linux/drivers/net/ethernet/intel/igc/igc_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c)  2018 Intel Corporation */
   3
   4#include <linux/module.h>
   5#include <linux/types.h>
   6#include <linux/if_vlan.h>
   7#include <linux/aer.h>
   8#include <linux/tcp.h>
   9#include <linux/udp.h>
  10#include <linux/ip.h>
  11#include <linux/pm_runtime.h>
  12#include <net/pkt_sched.h>
  13#include <linux/bpf_trace.h>
  14#include <net/xdp_sock_drv.h>
  15#include <linux/pci.h>
  16
  17#include <net/ipv6.h>
  18
  19#include "igc.h"
  20#include "igc_hw.h"
  21#include "igc_tsn.h"
  22#include "igc_xdp.h"
  23
  24#define DRV_SUMMARY     "Intel(R) 2.5G Ethernet Linux Driver"
  25
  26#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
  27
  28#define IGC_XDP_PASS            0
  29#define IGC_XDP_CONSUMED        BIT(0)
  30#define IGC_XDP_TX              BIT(1)
  31#define IGC_XDP_REDIRECT        BIT(2)
  32
  33static int debug = -1;
  34
  35MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
  36MODULE_DESCRIPTION(DRV_SUMMARY);
  37MODULE_LICENSE("GPL v2");
  38module_param(debug, int, 0);
  39MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
  40
  41char igc_driver_name[] = "igc";
  42static const char igc_driver_string[] = DRV_SUMMARY;
  43static const char igc_copyright[] =
  44        "Copyright(c) 2018 Intel Corporation.";
  45
  46static const struct igc_info *igc_info_tbl[] = {
  47        [board_base] = &igc_base_info,
  48};
  49
  50static const struct pci_device_id igc_pci_tbl[] = {
  51        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
  52        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
  53        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base },
  54        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base },
  55        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base },
  56        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base },
  57        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base },
  58        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base },
  59        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base },
  60        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base },
  61        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base },
  62        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base },
  63        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base },
  64        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base },
  65        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base },
  66        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base },
  67        /* required last entry */
  68        {0, }
  69};
  70
  71MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
  72
  73enum latency_range {
  74        lowest_latency = 0,
  75        low_latency = 1,
  76        bulk_latency = 2,
  77        latency_invalid = 255
  78};
  79
  80void igc_reset(struct igc_adapter *adapter)
  81{
  82        struct net_device *dev = adapter->netdev;
  83        struct igc_hw *hw = &adapter->hw;
  84        struct igc_fc_info *fc = &hw->fc;
  85        u32 pba, hwm;
  86
  87        /* Repartition PBA for greater than 9k MTU if required */
  88        pba = IGC_PBA_34K;
  89
  90        /* flow control settings
  91         * The high water mark must be low enough to fit one full frame
  92         * after transmitting the pause frame.  As such we must have enough
  93         * space to allow for us to complete our current transmit and then
  94         * receive the frame that is in progress from the link partner.
  95         * Set it to:
  96         * - the full Rx FIFO size minus one full Tx plus one full Rx frame
  97         */
  98        hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
  99
 100        fc->high_water = hwm & 0xFFFFFFF0;      /* 16-byte granularity */
 101        fc->low_water = fc->high_water - 16;
 102        fc->pause_time = 0xFFFF;
 103        fc->send_xon = 1;
 104        fc->current_mode = fc->requested_mode;
 105
 106        hw->mac.ops.reset_hw(hw);
 107
 108        if (hw->mac.ops.init_hw(hw))
 109                netdev_err(dev, "Error on hardware initialization\n");
 110
 111        /* Re-establish EEE setting */
 112        igc_set_eee_i225(hw, true, true, true);
 113
 114        if (!netif_running(adapter->netdev))
 115                igc_power_down_phy_copper_base(&adapter->hw);
 116
 117        /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */
 118        wr32(IGC_VET, ETH_P_8021Q);
 119
 120        /* Re-enable PTP, where applicable. */
 121        igc_ptp_reset(adapter);
 122
 123        /* Re-enable TSN offloading, where applicable. */
 124        igc_tsn_reset(adapter);
 125
 126        igc_get_phy_info(hw);
 127}
 128
 129/**
 130 * igc_power_up_link - Power up the phy link
 131 * @adapter: address of board private structure
 132 */
 133static void igc_power_up_link(struct igc_adapter *adapter)
 134{
 135        igc_reset_phy(&adapter->hw);
 136
 137        igc_power_up_phy_copper(&adapter->hw);
 138
 139        igc_setup_link(&adapter->hw);
 140}
 141
 142/**
 143 * igc_release_hw_control - release control of the h/w to f/w
 144 * @adapter: address of board private structure
 145 *
 146 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
 147 * For ASF and Pass Through versions of f/w this means that the
 148 * driver is no longer loaded.
 149 */
 150static void igc_release_hw_control(struct igc_adapter *adapter)
 151{
 152        struct igc_hw *hw = &adapter->hw;
 153        u32 ctrl_ext;
 154
 155        if (!pci_device_is_present(adapter->pdev))
 156                return;
 157
 158        /* Let firmware take over control of h/w */
 159        ctrl_ext = rd32(IGC_CTRL_EXT);
 160        wr32(IGC_CTRL_EXT,
 161             ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
 162}
 163
 164/**
 165 * igc_get_hw_control - get control of the h/w from f/w
 166 * @adapter: address of board private structure
 167 *
 168 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
 169 * For ASF and Pass Through versions of f/w this means that
 170 * the driver is loaded.
 171 */
 172static void igc_get_hw_control(struct igc_adapter *adapter)
 173{
 174        struct igc_hw *hw = &adapter->hw;
 175        u32 ctrl_ext;
 176
 177        /* Let firmware know the driver has taken over */
 178        ctrl_ext = rd32(IGC_CTRL_EXT);
 179        wr32(IGC_CTRL_EXT,
 180             ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
 181}
 182
 183static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf)
 184{
 185        dma_unmap_single(dev, dma_unmap_addr(buf, dma),
 186                         dma_unmap_len(buf, len), DMA_TO_DEVICE);
 187
 188        dma_unmap_len_set(buf, len, 0);
 189}
 190
 191/**
 192 * igc_clean_tx_ring - Free Tx Buffers
 193 * @tx_ring: ring to be cleaned
 194 */
 195static void igc_clean_tx_ring(struct igc_ring *tx_ring)
 196{
 197        u16 i = tx_ring->next_to_clean;
 198        struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
 199        u32 xsk_frames = 0;
 200
 201        while (i != tx_ring->next_to_use) {
 202                union igc_adv_tx_desc *eop_desc, *tx_desc;
 203
 204                switch (tx_buffer->type) {
 205                case IGC_TX_BUFFER_TYPE_XSK:
 206                        xsk_frames++;
 207                        break;
 208                case IGC_TX_BUFFER_TYPE_XDP:
 209                        xdp_return_frame(tx_buffer->xdpf);
 210                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 211                        break;
 212                case IGC_TX_BUFFER_TYPE_SKB:
 213                        dev_kfree_skb_any(tx_buffer->skb);
 214                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 215                        break;
 216                default:
 217                        netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
 218                        break;
 219                }
 220
 221                /* check for eop_desc to determine the end of the packet */
 222                eop_desc = tx_buffer->next_to_watch;
 223                tx_desc = IGC_TX_DESC(tx_ring, i);
 224
 225                /* unmap remaining buffers */
 226                while (tx_desc != eop_desc) {
 227                        tx_buffer++;
 228                        tx_desc++;
 229                        i++;
 230                        if (unlikely(i == tx_ring->count)) {
 231                                i = 0;
 232                                tx_buffer = tx_ring->tx_buffer_info;
 233                                tx_desc = IGC_TX_DESC(tx_ring, 0);
 234                        }
 235
 236                        /* unmap any remaining paged data */
 237                        if (dma_unmap_len(tx_buffer, len))
 238                                igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 239                }
 240
 241                tx_buffer->next_to_watch = NULL;
 242
 243                /* move us one more past the eop_desc for start of next pkt */
 244                tx_buffer++;
 245                i++;
 246                if (unlikely(i == tx_ring->count)) {
 247                        i = 0;
 248                        tx_buffer = tx_ring->tx_buffer_info;
 249                }
 250        }
 251
 252        if (tx_ring->xsk_pool && xsk_frames)
 253                xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
 254
 255        /* reset BQL for queue */
 256        netdev_tx_reset_queue(txring_txq(tx_ring));
 257
 258        /* reset next_to_use and next_to_clean */
 259        tx_ring->next_to_use = 0;
 260        tx_ring->next_to_clean = 0;
 261}
 262
 263/**
 264 * igc_free_tx_resources - Free Tx Resources per Queue
 265 * @tx_ring: Tx descriptor ring for a specific queue
 266 *
 267 * Free all transmit software resources
 268 */
 269void igc_free_tx_resources(struct igc_ring *tx_ring)
 270{
 271        igc_clean_tx_ring(tx_ring);
 272
 273        vfree(tx_ring->tx_buffer_info);
 274        tx_ring->tx_buffer_info = NULL;
 275
 276        /* if not set, then don't free */
 277        if (!tx_ring->desc)
 278                return;
 279
 280        dma_free_coherent(tx_ring->dev, tx_ring->size,
 281                          tx_ring->desc, tx_ring->dma);
 282
 283        tx_ring->desc = NULL;
 284}
 285
 286/**
 287 * igc_free_all_tx_resources - Free Tx Resources for All Queues
 288 * @adapter: board private structure
 289 *
 290 * Free all transmit software resources
 291 */
 292static void igc_free_all_tx_resources(struct igc_adapter *adapter)
 293{
 294        int i;
 295
 296        for (i = 0; i < adapter->num_tx_queues; i++)
 297                igc_free_tx_resources(adapter->tx_ring[i]);
 298}
 299
 300/**
 301 * igc_clean_all_tx_rings - Free Tx Buffers for all queues
 302 * @adapter: board private structure
 303 */
 304static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
 305{
 306        int i;
 307
 308        for (i = 0; i < adapter->num_tx_queues; i++)
 309                if (adapter->tx_ring[i])
 310                        igc_clean_tx_ring(adapter->tx_ring[i]);
 311}
 312
 313/**
 314 * igc_setup_tx_resources - allocate Tx resources (Descriptors)
 315 * @tx_ring: tx descriptor ring (for a specific queue) to setup
 316 *
 317 * Return 0 on success, negative on failure
 318 */
 319int igc_setup_tx_resources(struct igc_ring *tx_ring)
 320{
 321        struct net_device *ndev = tx_ring->netdev;
 322        struct device *dev = tx_ring->dev;
 323        int size = 0;
 324
 325        size = sizeof(struct igc_tx_buffer) * tx_ring->count;
 326        tx_ring->tx_buffer_info = vzalloc(size);
 327        if (!tx_ring->tx_buffer_info)
 328                goto err;
 329
 330        /* round up to nearest 4K */
 331        tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
 332        tx_ring->size = ALIGN(tx_ring->size, 4096);
 333
 334        tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
 335                                           &tx_ring->dma, GFP_KERNEL);
 336
 337        if (!tx_ring->desc)
 338                goto err;
 339
 340        tx_ring->next_to_use = 0;
 341        tx_ring->next_to_clean = 0;
 342
 343        return 0;
 344
 345err:
 346        vfree(tx_ring->tx_buffer_info);
 347        netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n");
 348        return -ENOMEM;
 349}
 350
 351/**
 352 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
 353 * @adapter: board private structure
 354 *
 355 * Return 0 on success, negative on failure
 356 */
 357static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
 358{
 359        struct net_device *dev = adapter->netdev;
 360        int i, err = 0;
 361
 362        for (i = 0; i < adapter->num_tx_queues; i++) {
 363                err = igc_setup_tx_resources(adapter->tx_ring[i]);
 364                if (err) {
 365                        netdev_err(dev, "Error on Tx queue %u setup\n", i);
 366                        for (i--; i >= 0; i--)
 367                                igc_free_tx_resources(adapter->tx_ring[i]);
 368                        break;
 369                }
 370        }
 371
 372        return err;
 373}
 374
 375static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring)
 376{
 377        u16 i = rx_ring->next_to_clean;
 378
 379        dev_kfree_skb(rx_ring->skb);
 380        rx_ring->skb = NULL;
 381
 382        /* Free all the Rx ring sk_buffs */
 383        while (i != rx_ring->next_to_alloc) {
 384                struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
 385
 386                /* Invalidate cache lines that may have been written to by
 387                 * device so that we avoid corrupting memory.
 388                 */
 389                dma_sync_single_range_for_cpu(rx_ring->dev,
 390                                              buffer_info->dma,
 391                                              buffer_info->page_offset,
 392                                              igc_rx_bufsz(rx_ring),
 393                                              DMA_FROM_DEVICE);
 394
 395                /* free resources associated with mapping */
 396                dma_unmap_page_attrs(rx_ring->dev,
 397                                     buffer_info->dma,
 398                                     igc_rx_pg_size(rx_ring),
 399                                     DMA_FROM_DEVICE,
 400                                     IGC_RX_DMA_ATTR);
 401                __page_frag_cache_drain(buffer_info->page,
 402                                        buffer_info->pagecnt_bias);
 403
 404                i++;
 405                if (i == rx_ring->count)
 406                        i = 0;
 407        }
 408}
 409
 410static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring)
 411{
 412        struct igc_rx_buffer *bi;
 413        u16 i;
 414
 415        for (i = 0; i < ring->count; i++) {
 416                bi = &ring->rx_buffer_info[i];
 417                if (!bi->xdp)
 418                        continue;
 419
 420                xsk_buff_free(bi->xdp);
 421                bi->xdp = NULL;
 422        }
 423}
 424
 425/**
 426 * igc_clean_rx_ring - Free Rx Buffers per Queue
 427 * @ring: ring to free buffers from
 428 */
 429static void igc_clean_rx_ring(struct igc_ring *ring)
 430{
 431        if (ring->xsk_pool)
 432                igc_clean_rx_ring_xsk_pool(ring);
 433        else
 434                igc_clean_rx_ring_page_shared(ring);
 435
 436        clear_ring_uses_large_buffer(ring);
 437
 438        ring->next_to_alloc = 0;
 439        ring->next_to_clean = 0;
 440        ring->next_to_use = 0;
 441}
 442
 443/**
 444 * igc_clean_all_rx_rings - Free Rx Buffers for all queues
 445 * @adapter: board private structure
 446 */
 447static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
 448{
 449        int i;
 450
 451        for (i = 0; i < adapter->num_rx_queues; i++)
 452                if (adapter->rx_ring[i])
 453                        igc_clean_rx_ring(adapter->rx_ring[i]);
 454}
 455
 456/**
 457 * igc_free_rx_resources - Free Rx Resources
 458 * @rx_ring: ring to clean the resources from
 459 *
 460 * Free all receive software resources
 461 */
 462void igc_free_rx_resources(struct igc_ring *rx_ring)
 463{
 464        igc_clean_rx_ring(rx_ring);
 465
 466        xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 467
 468        vfree(rx_ring->rx_buffer_info);
 469        rx_ring->rx_buffer_info = NULL;
 470
 471        /* if not set, then don't free */
 472        if (!rx_ring->desc)
 473                return;
 474
 475        dma_free_coherent(rx_ring->dev, rx_ring->size,
 476                          rx_ring->desc, rx_ring->dma);
 477
 478        rx_ring->desc = NULL;
 479}
 480
 481/**
 482 * igc_free_all_rx_resources - Free Rx Resources for All Queues
 483 * @adapter: board private structure
 484 *
 485 * Free all receive software resources
 486 */
 487static void igc_free_all_rx_resources(struct igc_adapter *adapter)
 488{
 489        int i;
 490
 491        for (i = 0; i < adapter->num_rx_queues; i++)
 492                igc_free_rx_resources(adapter->rx_ring[i]);
 493}
 494
 495/**
 496 * igc_setup_rx_resources - allocate Rx resources (Descriptors)
 497 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
 498 *
 499 * Returns 0 on success, negative on failure
 500 */
 501int igc_setup_rx_resources(struct igc_ring *rx_ring)
 502{
 503        struct net_device *ndev = rx_ring->netdev;
 504        struct device *dev = rx_ring->dev;
 505        u8 index = rx_ring->queue_index;
 506        int size, desc_len, res;
 507
 508        res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
 509                               rx_ring->q_vector->napi.napi_id);
 510        if (res < 0) {
 511                netdev_err(ndev, "Failed to register xdp_rxq index %u\n",
 512                           index);
 513                return res;
 514        }
 515
 516        size = sizeof(struct igc_rx_buffer) * rx_ring->count;
 517        rx_ring->rx_buffer_info = vzalloc(size);
 518        if (!rx_ring->rx_buffer_info)
 519                goto err;
 520
 521        desc_len = sizeof(union igc_adv_rx_desc);
 522
 523        /* Round up to nearest 4K */
 524        rx_ring->size = rx_ring->count * desc_len;
 525        rx_ring->size = ALIGN(rx_ring->size, 4096);
 526
 527        rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
 528                                           &rx_ring->dma, GFP_KERNEL);
 529
 530        if (!rx_ring->desc)
 531                goto err;
 532
 533        rx_ring->next_to_alloc = 0;
 534        rx_ring->next_to_clean = 0;
 535        rx_ring->next_to_use = 0;
 536
 537        return 0;
 538
 539err:
 540        xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 541        vfree(rx_ring->rx_buffer_info);
 542        rx_ring->rx_buffer_info = NULL;
 543        netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
 544        return -ENOMEM;
 545}
 546
 547/**
 548 * igc_setup_all_rx_resources - wrapper to allocate Rx resources
 549 *                                (Descriptors) for all queues
 550 * @adapter: board private structure
 551 *
 552 * Return 0 on success, negative on failure
 553 */
 554static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
 555{
 556        struct net_device *dev = adapter->netdev;
 557        int i, err = 0;
 558
 559        for (i = 0; i < adapter->num_rx_queues; i++) {
 560                err = igc_setup_rx_resources(adapter->rx_ring[i]);
 561                if (err) {
 562                        netdev_err(dev, "Error on Rx queue %u setup\n", i);
 563                        for (i--; i >= 0; i--)
 564                                igc_free_rx_resources(adapter->rx_ring[i]);
 565                        break;
 566                }
 567        }
 568
 569        return err;
 570}
 571
 572static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter,
 573                                              struct igc_ring *ring)
 574{
 575        if (!igc_xdp_is_enabled(adapter) ||
 576            !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags))
 577                return NULL;
 578
 579        return xsk_get_pool_from_qid(ring->netdev, ring->queue_index);
 580}
 581
 582/**
 583 * igc_configure_rx_ring - Configure a receive ring after Reset
 584 * @adapter: board private structure
 585 * @ring: receive ring to be configured
 586 *
 587 * Configure the Rx unit of the MAC after a reset.
 588 */
 589static void igc_configure_rx_ring(struct igc_adapter *adapter,
 590                                  struct igc_ring *ring)
 591{
 592        struct igc_hw *hw = &adapter->hw;
 593        union igc_adv_rx_desc *rx_desc;
 594        int reg_idx = ring->reg_idx;
 595        u32 srrctl = 0, rxdctl = 0;
 596        u64 rdba = ring->dma;
 597        u32 buf_size;
 598
 599        xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
 600        ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
 601        if (ring->xsk_pool) {
 602                WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 603                                                   MEM_TYPE_XSK_BUFF_POOL,
 604                                                   NULL));
 605                xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
 606        } else {
 607                WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 608                                                   MEM_TYPE_PAGE_SHARED,
 609                                                   NULL));
 610        }
 611
 612        if (igc_xdp_is_enabled(adapter))
 613                set_ring_uses_large_buffer(ring);
 614
 615        /* disable the queue */
 616        wr32(IGC_RXDCTL(reg_idx), 0);
 617
 618        /* Set DMA base address registers */
 619        wr32(IGC_RDBAL(reg_idx),
 620             rdba & 0x00000000ffffffffULL);
 621        wr32(IGC_RDBAH(reg_idx), rdba >> 32);
 622        wr32(IGC_RDLEN(reg_idx),
 623             ring->count * sizeof(union igc_adv_rx_desc));
 624
 625        /* initialize head and tail */
 626        ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
 627        wr32(IGC_RDH(reg_idx), 0);
 628        writel(0, ring->tail);
 629
 630        /* reset next-to- use/clean to place SW in sync with hardware */
 631        ring->next_to_clean = 0;
 632        ring->next_to_use = 0;
 633
 634        if (ring->xsk_pool)
 635                buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
 636        else if (ring_uses_large_buffer(ring))
 637                buf_size = IGC_RXBUFFER_3072;
 638        else
 639                buf_size = IGC_RXBUFFER_2048;
 640
 641        srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
 642        srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT;
 643        srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
 644
 645        wr32(IGC_SRRCTL(reg_idx), srrctl);
 646
 647        rxdctl |= IGC_RX_PTHRESH;
 648        rxdctl |= IGC_RX_HTHRESH << 8;
 649        rxdctl |= IGC_RX_WTHRESH << 16;
 650
 651        /* initialize rx_buffer_info */
 652        memset(ring->rx_buffer_info, 0,
 653               sizeof(struct igc_rx_buffer) * ring->count);
 654
 655        /* initialize Rx descriptor 0 */
 656        rx_desc = IGC_RX_DESC(ring, 0);
 657        rx_desc->wb.upper.length = 0;
 658
 659        /* enable receive descriptor fetching */
 660        rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
 661
 662        wr32(IGC_RXDCTL(reg_idx), rxdctl);
 663}
 664
 665/**
 666 * igc_configure_rx - Configure receive Unit after Reset
 667 * @adapter: board private structure
 668 *
 669 * Configure the Rx unit of the MAC after a reset.
 670 */
 671static void igc_configure_rx(struct igc_adapter *adapter)
 672{
 673        int i;
 674
 675        /* Setup the HW Rx Head and Tail Descriptor Pointers and
 676         * the Base and Length of the Rx Descriptor Ring
 677         */
 678        for (i = 0; i < adapter->num_rx_queues; i++)
 679                igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
 680}
 681
 682/**
 683 * igc_configure_tx_ring - Configure transmit ring after Reset
 684 * @adapter: board private structure
 685 * @ring: tx ring to configure
 686 *
 687 * Configure a transmit ring after a reset.
 688 */
 689static void igc_configure_tx_ring(struct igc_adapter *adapter,
 690                                  struct igc_ring *ring)
 691{
 692        struct igc_hw *hw = &adapter->hw;
 693        int reg_idx = ring->reg_idx;
 694        u64 tdba = ring->dma;
 695        u32 txdctl = 0;
 696
 697        ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
 698
 699        /* disable the queue */
 700        wr32(IGC_TXDCTL(reg_idx), 0);
 701        wrfl();
 702        mdelay(10);
 703
 704        wr32(IGC_TDLEN(reg_idx),
 705             ring->count * sizeof(union igc_adv_tx_desc));
 706        wr32(IGC_TDBAL(reg_idx),
 707             tdba & 0x00000000ffffffffULL);
 708        wr32(IGC_TDBAH(reg_idx), tdba >> 32);
 709
 710        ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
 711        wr32(IGC_TDH(reg_idx), 0);
 712        writel(0, ring->tail);
 713
 714        txdctl |= IGC_TX_PTHRESH;
 715        txdctl |= IGC_TX_HTHRESH << 8;
 716        txdctl |= IGC_TX_WTHRESH << 16;
 717
 718        txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
 719        wr32(IGC_TXDCTL(reg_idx), txdctl);
 720}
 721
 722/**
 723 * igc_configure_tx - Configure transmit Unit after Reset
 724 * @adapter: board private structure
 725 *
 726 * Configure the Tx unit of the MAC after a reset.
 727 */
 728static void igc_configure_tx(struct igc_adapter *adapter)
 729{
 730        int i;
 731
 732        for (i = 0; i < adapter->num_tx_queues; i++)
 733                igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
 734}
 735
 736/**
 737 * igc_setup_mrqc - configure the multiple receive queue control registers
 738 * @adapter: Board private structure
 739 */
 740static void igc_setup_mrqc(struct igc_adapter *adapter)
 741{
 742        struct igc_hw *hw = &adapter->hw;
 743        u32 j, num_rx_queues;
 744        u32 mrqc, rxcsum;
 745        u32 rss_key[10];
 746
 747        netdev_rss_key_fill(rss_key, sizeof(rss_key));
 748        for (j = 0; j < 10; j++)
 749                wr32(IGC_RSSRK(j), rss_key[j]);
 750
 751        num_rx_queues = adapter->rss_queues;
 752
 753        if (adapter->rss_indir_tbl_init != num_rx_queues) {
 754                for (j = 0; j < IGC_RETA_SIZE; j++)
 755                        adapter->rss_indir_tbl[j] =
 756                        (j * num_rx_queues) / IGC_RETA_SIZE;
 757                adapter->rss_indir_tbl_init = num_rx_queues;
 758        }
 759        igc_write_rss_indir_tbl(adapter);
 760
 761        /* Disable raw packet checksumming so that RSS hash is placed in
 762         * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
 763         * offloads as they are enabled by default
 764         */
 765        rxcsum = rd32(IGC_RXCSUM);
 766        rxcsum |= IGC_RXCSUM_PCSD;
 767
 768        /* Enable Receive Checksum Offload for SCTP */
 769        rxcsum |= IGC_RXCSUM_CRCOFL;
 770
 771        /* Don't need to set TUOFL or IPOFL, they default to 1 */
 772        wr32(IGC_RXCSUM, rxcsum);
 773
 774        /* Generate RSS hash based on packet types, TCP/UDP
 775         * port numbers and/or IPv4/v6 src and dst addresses
 776         */
 777        mrqc = IGC_MRQC_RSS_FIELD_IPV4 |
 778               IGC_MRQC_RSS_FIELD_IPV4_TCP |
 779               IGC_MRQC_RSS_FIELD_IPV6 |
 780               IGC_MRQC_RSS_FIELD_IPV6_TCP |
 781               IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
 782
 783        if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
 784                mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
 785        if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
 786                mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
 787
 788        mrqc |= IGC_MRQC_ENABLE_RSS_MQ;
 789
 790        wr32(IGC_MRQC, mrqc);
 791}
 792
 793/**
 794 * igc_setup_rctl - configure the receive control registers
 795 * @adapter: Board private structure
 796 */
 797static void igc_setup_rctl(struct igc_adapter *adapter)
 798{
 799        struct igc_hw *hw = &adapter->hw;
 800        u32 rctl;
 801
 802        rctl = rd32(IGC_RCTL);
 803
 804        rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
 805        rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
 806
 807        rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
 808                (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
 809
 810        /* enable stripping of CRC. Newer features require
 811         * that the HW strips the CRC.
 812         */
 813        rctl |= IGC_RCTL_SECRC;
 814
 815        /* disable store bad packets and clear size bits. */
 816        rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
 817
 818        /* enable LPE to allow for reception of jumbo frames */
 819        rctl |= IGC_RCTL_LPE;
 820
 821        /* disable queue 0 to prevent tail write w/o re-config */
 822        wr32(IGC_RXDCTL(0), 0);
 823
 824        /* This is useful for sniffing bad packets. */
 825        if (adapter->netdev->features & NETIF_F_RXALL) {
 826                /* UPE and MPE will be handled by normal PROMISC logic
 827                 * in set_rx_mode
 828                 */
 829                rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
 830                         IGC_RCTL_BAM | /* RX All Bcast Pkts */
 831                         IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
 832
 833                rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
 834                          IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
 835        }
 836
 837        wr32(IGC_RCTL, rctl);
 838}
 839
 840/**
 841 * igc_setup_tctl - configure the transmit control registers
 842 * @adapter: Board private structure
 843 */
 844static void igc_setup_tctl(struct igc_adapter *adapter)
 845{
 846        struct igc_hw *hw = &adapter->hw;
 847        u32 tctl;
 848
 849        /* disable queue 0 which icould be enabled by default */
 850        wr32(IGC_TXDCTL(0), 0);
 851
 852        /* Program the Transmit Control Register */
 853        tctl = rd32(IGC_TCTL);
 854        tctl &= ~IGC_TCTL_CT;
 855        tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
 856                (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
 857
 858        /* Enable transmits */
 859        tctl |= IGC_TCTL_EN;
 860
 861        wr32(IGC_TCTL, tctl);
 862}
 863
 864/**
 865 * igc_set_mac_filter_hw() - Set MAC address filter in hardware
 866 * @adapter: Pointer to adapter where the filter should be set
 867 * @index: Filter index
 868 * @type: MAC address filter type (source or destination)
 869 * @addr: MAC address
 870 * @queue: If non-negative, queue assignment feature is enabled and frames
 871 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
 872 *         assignment is disabled.
 873 */
 874static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
 875                                  enum igc_mac_filter_type type,
 876                                  const u8 *addr, int queue)
 877{
 878        struct net_device *dev = adapter->netdev;
 879        struct igc_hw *hw = &adapter->hw;
 880        u32 ral, rah;
 881
 882        if (WARN_ON(index >= hw->mac.rar_entry_count))
 883                return;
 884
 885        ral = le32_to_cpup((__le32 *)(addr));
 886        rah = le16_to_cpup((__le16 *)(addr + 4));
 887
 888        if (type == IGC_MAC_FILTER_TYPE_SRC) {
 889                rah &= ~IGC_RAH_ASEL_MASK;
 890                rah |= IGC_RAH_ASEL_SRC_ADDR;
 891        }
 892
 893        if (queue >= 0) {
 894                rah &= ~IGC_RAH_QSEL_MASK;
 895                rah |= (queue << IGC_RAH_QSEL_SHIFT);
 896                rah |= IGC_RAH_QSEL_ENABLE;
 897        }
 898
 899        rah |= IGC_RAH_AV;
 900
 901        wr32(IGC_RAL(index), ral);
 902        wr32(IGC_RAH(index), rah);
 903
 904        netdev_dbg(dev, "MAC address filter set in HW: index %d", index);
 905}
 906
 907/**
 908 * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware
 909 * @adapter: Pointer to adapter where the filter should be cleared
 910 * @index: Filter index
 911 */
 912static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
 913{
 914        struct net_device *dev = adapter->netdev;
 915        struct igc_hw *hw = &adapter->hw;
 916
 917        if (WARN_ON(index >= hw->mac.rar_entry_count))
 918                return;
 919
 920        wr32(IGC_RAL(index), 0);
 921        wr32(IGC_RAH(index), 0);
 922
 923        netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index);
 924}
 925
 926/* Set default MAC address for the PF in the first RAR entry */
 927static void igc_set_default_mac_filter(struct igc_adapter *adapter)
 928{
 929        struct net_device *dev = adapter->netdev;
 930        u8 *addr = adapter->hw.mac.addr;
 931
 932        netdev_dbg(dev, "Set default MAC address filter: address %pM", addr);
 933
 934        igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1);
 935}
 936
 937/**
 938 * igc_set_mac - Change the Ethernet Address of the NIC
 939 * @netdev: network interface device structure
 940 * @p: pointer to an address structure
 941 *
 942 * Returns 0 on success, negative on failure
 943 */
 944static int igc_set_mac(struct net_device *netdev, void *p)
 945{
 946        struct igc_adapter *adapter = netdev_priv(netdev);
 947        struct igc_hw *hw = &adapter->hw;
 948        struct sockaddr *addr = p;
 949
 950        if (!is_valid_ether_addr(addr->sa_data))
 951                return -EADDRNOTAVAIL;
 952
 953        eth_hw_addr_set(netdev, addr->sa_data);
 954        memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
 955
 956        /* set the correct pool for the new PF MAC address in entry 0 */
 957        igc_set_default_mac_filter(adapter);
 958
 959        return 0;
 960}
 961
 962/**
 963 *  igc_write_mc_addr_list - write multicast addresses to MTA
 964 *  @netdev: network interface device structure
 965 *
 966 *  Writes multicast address list to the MTA hash table.
 967 *  Returns: -ENOMEM on failure
 968 *           0 on no addresses written
 969 *           X on writing X addresses to MTA
 970 **/
 971static int igc_write_mc_addr_list(struct net_device *netdev)
 972{
 973        struct igc_adapter *adapter = netdev_priv(netdev);
 974        struct igc_hw *hw = &adapter->hw;
 975        struct netdev_hw_addr *ha;
 976        u8  *mta_list;
 977        int i;
 978
 979        if (netdev_mc_empty(netdev)) {
 980                /* nothing to program, so clear mc list */
 981                igc_update_mc_addr_list(hw, NULL, 0);
 982                return 0;
 983        }
 984
 985        mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC);
 986        if (!mta_list)
 987                return -ENOMEM;
 988
 989        /* The shared function expects a packed array of only addresses. */
 990        i = 0;
 991        netdev_for_each_mc_addr(ha, netdev)
 992                memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
 993
 994        igc_update_mc_addr_list(hw, mta_list, i);
 995        kfree(mta_list);
 996
 997        return netdev_mc_count(netdev);
 998}
 999
1000static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime)
1001{
1002        ktime_t cycle_time = adapter->cycle_time;
1003        ktime_t base_time = adapter->base_time;
1004        u32 launchtime;
1005
1006        /* FIXME: when using ETF together with taprio, we may have a
1007         * case where 'delta' is larger than the cycle_time, this may
1008         * cause problems if we don't read the current value of
1009         * IGC_BASET, as the value writen into the launchtime
1010         * descriptor field may be misinterpreted.
1011         */
1012        div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime);
1013
1014        return cpu_to_le32(launchtime);
1015}
1016
1017static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
1018                            struct igc_tx_buffer *first,
1019                            u32 vlan_macip_lens, u32 type_tucmd,
1020                            u32 mss_l4len_idx)
1021{
1022        struct igc_adv_tx_context_desc *context_desc;
1023        u16 i = tx_ring->next_to_use;
1024
1025        context_desc = IGC_TX_CTXTDESC(tx_ring, i);
1026
1027        i++;
1028        tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1029
1030        /* set bits to identify this as an advanced context descriptor */
1031        type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
1032
1033        /* For i225, context index must be unique per ring. */
1034        if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
1035                mss_l4len_idx |= tx_ring->reg_idx << 4;
1036
1037        context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
1038        context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
1039        context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
1040
1041        /* We assume there is always a valid Tx time available. Invalid times
1042         * should have been handled by the upper layers.
1043         */
1044        if (tx_ring->launchtime_enable) {
1045                struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
1046                ktime_t txtime = first->skb->tstamp;
1047
1048                skb_txtime_consumed(first->skb);
1049                context_desc->launch_time = igc_tx_launchtime(adapter,
1050                                                              txtime);
1051        } else {
1052                context_desc->launch_time = 0;
1053        }
1054}
1055
1056static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
1057{
1058        struct sk_buff *skb = first->skb;
1059        u32 vlan_macip_lens = 0;
1060        u32 type_tucmd = 0;
1061
1062        if (skb->ip_summed != CHECKSUM_PARTIAL) {
1063csum_failed:
1064                if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) &&
1065                    !tx_ring->launchtime_enable)
1066                        return;
1067                goto no_csum;
1068        }
1069
1070        switch (skb->csum_offset) {
1071        case offsetof(struct tcphdr, check):
1072                type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1073                fallthrough;
1074        case offsetof(struct udphdr, check):
1075                break;
1076        case offsetof(struct sctphdr, checksum):
1077                /* validate that this is actually an SCTP request */
1078                if (skb_csum_is_sctp(skb)) {
1079                        type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP;
1080                        break;
1081                }
1082                fallthrough;
1083        default:
1084                skb_checksum_help(skb);
1085                goto csum_failed;
1086        }
1087
1088        /* update TX checksum flag */
1089        first->tx_flags |= IGC_TX_FLAGS_CSUM;
1090        vlan_macip_lens = skb_checksum_start_offset(skb) -
1091                          skb_network_offset(skb);
1092no_csum:
1093        vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
1094        vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1095
1096        igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
1097}
1098
1099static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1100{
1101        struct net_device *netdev = tx_ring->netdev;
1102
1103        netif_stop_subqueue(netdev, tx_ring->queue_index);
1104
1105        /* memory barriier comment */
1106        smp_mb();
1107
1108        /* We need to check again in a case another CPU has just
1109         * made room available.
1110         */
1111        if (igc_desc_unused(tx_ring) < size)
1112                return -EBUSY;
1113
1114        /* A reprieve! */
1115        netif_wake_subqueue(netdev, tx_ring->queue_index);
1116
1117        u64_stats_update_begin(&tx_ring->tx_syncp2);
1118        tx_ring->tx_stats.restart_queue2++;
1119        u64_stats_update_end(&tx_ring->tx_syncp2);
1120
1121        return 0;
1122}
1123
1124static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1125{
1126        if (igc_desc_unused(tx_ring) >= size)
1127                return 0;
1128        return __igc_maybe_stop_tx(tx_ring, size);
1129}
1130
1131#define IGC_SET_FLAG(_input, _flag, _result) \
1132        (((_flag) <= (_result)) ?                               \
1133         ((u32)((_input) & (_flag)) * ((_result) / (_flag))) :  \
1134         ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
1135
1136static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
1137{
1138        /* set type for advanced descriptor with frame checksum insertion */
1139        u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
1140                       IGC_ADVTXD_DCMD_DEXT |
1141                       IGC_ADVTXD_DCMD_IFCS;
1142
1143        /* set HW vlan bit if vlan is present */
1144        cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN,
1145                                 IGC_ADVTXD_DCMD_VLE);
1146
1147        /* set segmentation bits for TSO */
1148        cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO,
1149                                 (IGC_ADVTXD_DCMD_TSE));
1150
1151        /* set timestamp bit if present */
1152        cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
1153                                 (IGC_ADVTXD_MAC_TSTAMP));
1154
1155        /* insert frame checksum */
1156        cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS);
1157
1158        return cmd_type;
1159}
1160
1161static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
1162                                 union igc_adv_tx_desc *tx_desc,
1163                                 u32 tx_flags, unsigned int paylen)
1164{
1165        u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
1166
1167        /* insert L4 checksum */
1168        olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
1169                          ((IGC_TXD_POPTS_TXSM << 8) /
1170                          IGC_TX_FLAGS_CSUM);
1171
1172        /* insert IPv4 checksum */
1173        olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
1174                          (((IGC_TXD_POPTS_IXSM << 8)) /
1175                          IGC_TX_FLAGS_IPV4);
1176
1177        tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
1178}
1179
1180static int igc_tx_map(struct igc_ring *tx_ring,
1181                      struct igc_tx_buffer *first,
1182                      const u8 hdr_len)
1183{
1184        struct sk_buff *skb = first->skb;
1185        struct igc_tx_buffer *tx_buffer;
1186        union igc_adv_tx_desc *tx_desc;
1187        u32 tx_flags = first->tx_flags;
1188        skb_frag_t *frag;
1189        u16 i = tx_ring->next_to_use;
1190        unsigned int data_len, size;
1191        dma_addr_t dma;
1192        u32 cmd_type;
1193
1194        cmd_type = igc_tx_cmd_type(skb, tx_flags);
1195        tx_desc = IGC_TX_DESC(tx_ring, i);
1196
1197        igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
1198
1199        size = skb_headlen(skb);
1200        data_len = skb->data_len;
1201
1202        dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
1203
1204        tx_buffer = first;
1205
1206        for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
1207                if (dma_mapping_error(tx_ring->dev, dma))
1208                        goto dma_error;
1209
1210                /* record length, and DMA address */
1211                dma_unmap_len_set(tx_buffer, len, size);
1212                dma_unmap_addr_set(tx_buffer, dma, dma);
1213
1214                tx_desc->read.buffer_addr = cpu_to_le64(dma);
1215
1216                while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
1217                        tx_desc->read.cmd_type_len =
1218                                cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
1219
1220                        i++;
1221                        tx_desc++;
1222                        if (i == tx_ring->count) {
1223                                tx_desc = IGC_TX_DESC(tx_ring, 0);
1224                                i = 0;
1225                        }
1226                        tx_desc->read.olinfo_status = 0;
1227
1228                        dma += IGC_MAX_DATA_PER_TXD;
1229                        size -= IGC_MAX_DATA_PER_TXD;
1230
1231                        tx_desc->read.buffer_addr = cpu_to_le64(dma);
1232                }
1233
1234                if (likely(!data_len))
1235                        break;
1236
1237                tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
1238
1239                i++;
1240                tx_desc++;
1241                if (i == tx_ring->count) {
1242                        tx_desc = IGC_TX_DESC(tx_ring, 0);
1243                        i = 0;
1244                }
1245                tx_desc->read.olinfo_status = 0;
1246
1247                size = skb_frag_size(frag);
1248                data_len -= size;
1249
1250                dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
1251                                       size, DMA_TO_DEVICE);
1252
1253                tx_buffer = &tx_ring->tx_buffer_info[i];
1254        }
1255
1256        /* write last descriptor with RS and EOP bits */
1257        cmd_type |= size | IGC_TXD_DCMD;
1258        tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
1259
1260        netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
1261
1262        /* set the timestamp */
1263        first->time_stamp = jiffies;
1264
1265        skb_tx_timestamp(skb);
1266
1267        /* Force memory writes to complete before letting h/w know there
1268         * are new descriptors to fetch.  (Only applicable for weak-ordered
1269         * memory model archs, such as IA-64).
1270         *
1271         * We also need this memory barrier to make certain all of the
1272         * status bits have been updated before next_to_watch is written.
1273         */
1274        wmb();
1275
1276        /* set next_to_watch value indicating a packet is present */
1277        first->next_to_watch = tx_desc;
1278
1279        i++;
1280        if (i == tx_ring->count)
1281                i = 0;
1282
1283        tx_ring->next_to_use = i;
1284
1285        /* Make sure there is space in the ring for the next send. */
1286        igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
1287
1288        if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
1289                writel(i, tx_ring->tail);
1290        }
1291
1292        return 0;
1293dma_error:
1294        netdev_err(tx_ring->netdev, "TX DMA map failed\n");
1295        tx_buffer = &tx_ring->tx_buffer_info[i];
1296
1297        /* clear dma mappings for failed tx_buffer_info map */
1298        while (tx_buffer != first) {
1299                if (dma_unmap_len(tx_buffer, len))
1300                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1301
1302                if (i-- == 0)
1303                        i += tx_ring->count;
1304                tx_buffer = &tx_ring->tx_buffer_info[i];
1305        }
1306
1307        if (dma_unmap_len(tx_buffer, len))
1308                igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1309
1310        dev_kfree_skb_any(tx_buffer->skb);
1311        tx_buffer->skb = NULL;
1312
1313        tx_ring->next_to_use = i;
1314
1315        return -1;
1316}
1317
1318static int igc_tso(struct igc_ring *tx_ring,
1319                   struct igc_tx_buffer *first,
1320                   u8 *hdr_len)
1321{
1322        u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
1323        struct sk_buff *skb = first->skb;
1324        union {
1325                struct iphdr *v4;
1326                struct ipv6hdr *v6;
1327                unsigned char *hdr;
1328        } ip;
1329        union {
1330                struct tcphdr *tcp;
1331                struct udphdr *udp;
1332                unsigned char *hdr;
1333        } l4;
1334        u32 paylen, l4_offset;
1335        int err;
1336
1337        if (skb->ip_summed != CHECKSUM_PARTIAL)
1338                return 0;
1339
1340        if (!skb_is_gso(skb))
1341                return 0;
1342
1343        err = skb_cow_head(skb, 0);
1344        if (err < 0)
1345                return err;
1346
1347        ip.hdr = skb_network_header(skb);
1348        l4.hdr = skb_checksum_start(skb);
1349
1350        /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
1351        type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1352
1353        /* initialize outer IP header fields */
1354        if (ip.v4->version == 4) {
1355                unsigned char *csum_start = skb_checksum_start(skb);
1356                unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
1357
1358                /* IP header will have to cancel out any data that
1359                 * is not a part of the outer IP header
1360                 */
1361                ip.v4->check = csum_fold(csum_partial(trans_start,
1362                                                      csum_start - trans_start,
1363                                                      0));
1364                type_tucmd |= IGC_ADVTXD_TUCMD_IPV4;
1365
1366                ip.v4->tot_len = 0;
1367                first->tx_flags |= IGC_TX_FLAGS_TSO |
1368                                   IGC_TX_FLAGS_CSUM |
1369                                   IGC_TX_FLAGS_IPV4;
1370        } else {
1371                ip.v6->payload_len = 0;
1372                first->tx_flags |= IGC_TX_FLAGS_TSO |
1373                                   IGC_TX_FLAGS_CSUM;
1374        }
1375
1376        /* determine offset of inner transport header */
1377        l4_offset = l4.hdr - skb->data;
1378
1379        /* remove payload length from inner checksum */
1380        paylen = skb->len - l4_offset;
1381        if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) {
1382                /* compute length of segmentation header */
1383                *hdr_len = (l4.tcp->doff * 4) + l4_offset;
1384                csum_replace_by_diff(&l4.tcp->check,
1385                                     (__force __wsum)htonl(paylen));
1386        } else {
1387                /* compute length of segmentation header */
1388                *hdr_len = sizeof(*l4.udp) + l4_offset;
1389                csum_replace_by_diff(&l4.udp->check,
1390                                     (__force __wsum)htonl(paylen));
1391        }
1392
1393        /* update gso size and bytecount with header size */
1394        first->gso_segs = skb_shinfo(skb)->gso_segs;
1395        first->bytecount += (first->gso_segs - 1) * *hdr_len;
1396
1397        /* MSS L4LEN IDX */
1398        mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT;
1399        mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT;
1400
1401        /* VLAN MACLEN IPLEN */
1402        vlan_macip_lens = l4.hdr - ip.hdr;
1403        vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
1404        vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1405
1406        igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
1407                        type_tucmd, mss_l4len_idx);
1408
1409        return 1;
1410}
1411
1412static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
1413                                       struct igc_ring *tx_ring)
1414{
1415        u16 count = TXD_USE_COUNT(skb_headlen(skb));
1416        __be16 protocol = vlan_get_protocol(skb);
1417        struct igc_tx_buffer *first;
1418        u32 tx_flags = 0;
1419        unsigned short f;
1420        u8 hdr_len = 0;
1421        int tso = 0;
1422
1423        /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
1424         *      + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
1425         *      + 2 desc gap to keep tail from touching head,
1426         *      + 1 desc for context descriptor,
1427         * otherwise try next time
1428         */
1429        for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
1430                count += TXD_USE_COUNT(skb_frag_size(
1431                                                &skb_shinfo(skb)->frags[f]));
1432
1433        if (igc_maybe_stop_tx(tx_ring, count + 3)) {
1434                /* this is a hard error */
1435                return NETDEV_TX_BUSY;
1436        }
1437
1438        /* record the location of the first descriptor for this packet */
1439        first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
1440        first->type = IGC_TX_BUFFER_TYPE_SKB;
1441        first->skb = skb;
1442        first->bytecount = skb->len;
1443        first->gso_segs = 1;
1444
1445        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
1446                struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
1447
1448                /* FIXME: add support for retrieving timestamps from
1449                 * the other timer registers before skipping the
1450                 * timestamping request.
1451                 */
1452                if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
1453                    !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
1454                                           &adapter->state)) {
1455                        skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1456                        tx_flags |= IGC_TX_FLAGS_TSTAMP;
1457
1458                        adapter->ptp_tx_skb = skb_get(skb);
1459                        adapter->ptp_tx_start = jiffies;
1460                } else {
1461                        adapter->tx_hwtstamp_skipped++;
1462                }
1463        }
1464
1465        if (skb_vlan_tag_present(skb)) {
1466                tx_flags |= IGC_TX_FLAGS_VLAN;
1467                tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT);
1468        }
1469
1470        /* record initial flags and protocol */
1471        first->tx_flags = tx_flags;
1472        first->protocol = protocol;
1473
1474        tso = igc_tso(tx_ring, first, &hdr_len);
1475        if (tso < 0)
1476                goto out_drop;
1477        else if (!tso)
1478                igc_tx_csum(tx_ring, first);
1479
1480        igc_tx_map(tx_ring, first, hdr_len);
1481
1482        return NETDEV_TX_OK;
1483
1484out_drop:
1485        dev_kfree_skb_any(first->skb);
1486        first->skb = NULL;
1487
1488        return NETDEV_TX_OK;
1489}
1490
1491static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
1492                                                    struct sk_buff *skb)
1493{
1494        unsigned int r_idx = skb->queue_mapping;
1495
1496        if (r_idx >= adapter->num_tx_queues)
1497                r_idx = r_idx % adapter->num_tx_queues;
1498
1499        return adapter->tx_ring[r_idx];
1500}
1501
1502static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
1503                                  struct net_device *netdev)
1504{
1505        struct igc_adapter *adapter = netdev_priv(netdev);
1506
1507        /* The minimum packet size with TCTL.PSP set is 17 so pad the skb
1508         * in order to meet this minimum size requirement.
1509         */
1510        if (skb->len < 17) {
1511                if (skb_padto(skb, 17))
1512                        return NETDEV_TX_OK;
1513                skb->len = 17;
1514        }
1515
1516        return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
1517}
1518
1519static void igc_rx_checksum(struct igc_ring *ring,
1520                            union igc_adv_rx_desc *rx_desc,
1521                            struct sk_buff *skb)
1522{
1523        skb_checksum_none_assert(skb);
1524
1525        /* Ignore Checksum bit is set */
1526        if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM))
1527                return;
1528
1529        /* Rx checksum disabled via ethtool */
1530        if (!(ring->netdev->features & NETIF_F_RXCSUM))
1531                return;
1532
1533        /* TCP/UDP checksum error bit is set */
1534        if (igc_test_staterr(rx_desc,
1535                             IGC_RXDEXT_STATERR_L4E |
1536                             IGC_RXDEXT_STATERR_IPE)) {
1537                /* work around errata with sctp packets where the TCPE aka
1538                 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
1539                 * packets (aka let the stack check the crc32c)
1540                 */
1541                if (!(skb->len == 60 &&
1542                      test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
1543                        u64_stats_update_begin(&ring->rx_syncp);
1544                        ring->rx_stats.csum_err++;
1545                        u64_stats_update_end(&ring->rx_syncp);
1546                }
1547                /* let the stack verify checksum errors */
1548                return;
1549        }
1550        /* It must be a TCP or UDP packet with a valid checksum */
1551        if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS |
1552                                      IGC_RXD_STAT_UDPCS))
1553                skb->ip_summed = CHECKSUM_UNNECESSARY;
1554
1555        netdev_dbg(ring->netdev, "cksum success: bits %08X\n",
1556                   le32_to_cpu(rx_desc->wb.upper.status_error));
1557}
1558
1559static inline void igc_rx_hash(struct igc_ring *ring,
1560                               union igc_adv_rx_desc *rx_desc,
1561                               struct sk_buff *skb)
1562{
1563        if (ring->netdev->features & NETIF_F_RXHASH)
1564                skb_set_hash(skb,
1565                             le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
1566                             PKT_HASH_TYPE_L3);
1567}
1568
1569static void igc_rx_vlan(struct igc_ring *rx_ring,
1570                        union igc_adv_rx_desc *rx_desc,
1571                        struct sk_buff *skb)
1572{
1573        struct net_device *dev = rx_ring->netdev;
1574        u16 vid;
1575
1576        if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
1577            igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) {
1578                if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) &&
1579                    test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
1580                        vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
1581                else
1582                        vid = le16_to_cpu(rx_desc->wb.upper.vlan);
1583
1584                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
1585        }
1586}
1587
1588/**
1589 * igc_process_skb_fields - Populate skb header fields from Rx descriptor
1590 * @rx_ring: rx descriptor ring packet is being transacted on
1591 * @rx_desc: pointer to the EOP Rx descriptor
1592 * @skb: pointer to current skb being populated
1593 *
1594 * This function checks the ring, descriptor, and packet information in order
1595 * to populate the hash, checksum, VLAN, protocol, and other fields within the
1596 * skb.
1597 */
1598static void igc_process_skb_fields(struct igc_ring *rx_ring,
1599                                   union igc_adv_rx_desc *rx_desc,
1600                                   struct sk_buff *skb)
1601{
1602        igc_rx_hash(rx_ring, rx_desc, skb);
1603
1604        igc_rx_checksum(rx_ring, rx_desc, skb);
1605
1606        igc_rx_vlan(rx_ring, rx_desc, skb);
1607
1608        skb_record_rx_queue(skb, rx_ring->queue_index);
1609
1610        skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1611}
1612
1613static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features)
1614{
1615        bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
1616        struct igc_adapter *adapter = netdev_priv(netdev);
1617        struct igc_hw *hw = &adapter->hw;
1618        u32 ctrl;
1619
1620        ctrl = rd32(IGC_CTRL);
1621
1622        if (enable) {
1623                /* enable VLAN tag insert/strip */
1624                ctrl |= IGC_CTRL_VME;
1625        } else {
1626                /* disable VLAN tag insert/strip */
1627                ctrl &= ~IGC_CTRL_VME;
1628        }
1629        wr32(IGC_CTRL, ctrl);
1630}
1631
1632static void igc_restore_vlan(struct igc_adapter *adapter)
1633{
1634        igc_vlan_mode(adapter->netdev, adapter->netdev->features);
1635}
1636
1637static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
1638                                               const unsigned int size,
1639                                               int *rx_buffer_pgcnt)
1640{
1641        struct igc_rx_buffer *rx_buffer;
1642
1643        rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
1644        *rx_buffer_pgcnt =
1645#if (PAGE_SIZE < 8192)
1646                page_count(rx_buffer->page);
1647#else
1648                0;
1649#endif
1650        prefetchw(rx_buffer->page);
1651
1652        /* we are reusing so sync this buffer for CPU use */
1653        dma_sync_single_range_for_cpu(rx_ring->dev,
1654                                      rx_buffer->dma,
1655                                      rx_buffer->page_offset,
1656                                      size,
1657                                      DMA_FROM_DEVICE);
1658
1659        rx_buffer->pagecnt_bias--;
1660
1661        return rx_buffer;
1662}
1663
1664static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer,
1665                               unsigned int truesize)
1666{
1667#if (PAGE_SIZE < 8192)
1668        buffer->page_offset ^= truesize;
1669#else
1670        buffer->page_offset += truesize;
1671#endif
1672}
1673
1674static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring,
1675                                              unsigned int size)
1676{
1677        unsigned int truesize;
1678
1679#if (PAGE_SIZE < 8192)
1680        truesize = igc_rx_pg_size(ring) / 2;
1681#else
1682        truesize = ring_uses_build_skb(ring) ?
1683                   SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
1684                   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1685                   SKB_DATA_ALIGN(size);
1686#endif
1687        return truesize;
1688}
1689
1690/**
1691 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
1692 * @rx_ring: rx descriptor ring to transact packets on
1693 * @rx_buffer: buffer containing page to add
1694 * @skb: sk_buff to place the data into
1695 * @size: size of buffer to be added
1696 *
1697 * This function will add the data contained in rx_buffer->page to the skb.
1698 */
1699static void igc_add_rx_frag(struct igc_ring *rx_ring,
1700                            struct igc_rx_buffer *rx_buffer,
1701                            struct sk_buff *skb,
1702                            unsigned int size)
1703{
1704        unsigned int truesize;
1705
1706#if (PAGE_SIZE < 8192)
1707        truesize = igc_rx_pg_size(rx_ring) / 2;
1708#else
1709        truesize = ring_uses_build_skb(rx_ring) ?
1710                   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1711                   SKB_DATA_ALIGN(size);
1712#endif
1713        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
1714                        rx_buffer->page_offset, size, truesize);
1715
1716        igc_rx_buffer_flip(rx_buffer, truesize);
1717}
1718
1719static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
1720                                     struct igc_rx_buffer *rx_buffer,
1721                                     union igc_adv_rx_desc *rx_desc,
1722                                     unsigned int size)
1723{
1724        void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
1725        unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1726        struct sk_buff *skb;
1727
1728        /* prefetch first cache line of first page */
1729        net_prefetch(va);
1730
1731        /* build an skb around the page buffer */
1732        skb = build_skb(va - IGC_SKB_PAD, truesize);
1733        if (unlikely(!skb))
1734                return NULL;
1735
1736        /* update pointers within the skb to store the data */
1737        skb_reserve(skb, IGC_SKB_PAD);
1738        __skb_put(skb, size);
1739
1740        igc_rx_buffer_flip(rx_buffer, truesize);
1741        return skb;
1742}
1743
1744static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
1745                                         struct igc_rx_buffer *rx_buffer,
1746                                         struct xdp_buff *xdp,
1747                                         ktime_t timestamp)
1748{
1749        unsigned int size = xdp->data_end - xdp->data;
1750        unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1751        void *va = xdp->data;
1752        unsigned int headlen;
1753        struct sk_buff *skb;
1754
1755        /* prefetch first cache line of first page */
1756        net_prefetch(va);
1757
1758        /* allocate a skb to store the frags */
1759        skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);
1760        if (unlikely(!skb))
1761                return NULL;
1762
1763        if (timestamp)
1764                skb_hwtstamps(skb)->hwtstamp = timestamp;
1765
1766        /* Determine available headroom for copy */
1767        headlen = size;
1768        if (headlen > IGC_RX_HDR_LEN)
1769                headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);
1770
1771        /* align pull length to size of long to optimize memcpy performance */
1772        memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
1773
1774        /* update all of the pointers */
1775        size -= headlen;
1776        if (size) {
1777                skb_add_rx_frag(skb, 0, rx_buffer->page,
1778                                (va + headlen) - page_address(rx_buffer->page),
1779                                size, truesize);
1780                igc_rx_buffer_flip(rx_buffer, truesize);
1781        } else {
1782                rx_buffer->pagecnt_bias++;
1783        }
1784
1785        return skb;
1786}
1787
1788/**
1789 * igc_reuse_rx_page - page flip buffer and store it back on the ring
1790 * @rx_ring: rx descriptor ring to store buffers on
1791 * @old_buff: donor buffer to have page reused
1792 *
1793 * Synchronizes page for reuse by the adapter
1794 */
1795static void igc_reuse_rx_page(struct igc_ring *rx_ring,
1796                              struct igc_rx_buffer *old_buff)
1797{
1798        u16 nta = rx_ring->next_to_alloc;
1799        struct igc_rx_buffer *new_buff;
1800
1801        new_buff = &rx_ring->rx_buffer_info[nta];
1802
1803        /* update, and store next to alloc */
1804        nta++;
1805        rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
1806
1807        /* Transfer page from old buffer to new buffer.
1808         * Move each member individually to avoid possible store
1809         * forwarding stalls.
1810         */
1811        new_buff->dma           = old_buff->dma;
1812        new_buff->page          = old_buff->page;
1813        new_buff->page_offset   = old_buff->page_offset;
1814        new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
1815}
1816
1817static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer,
1818                                  int rx_buffer_pgcnt)
1819{
1820        unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
1821        struct page *page = rx_buffer->page;
1822
1823        /* avoid re-using remote and pfmemalloc pages */
1824        if (!dev_page_is_reusable(page))
1825                return false;
1826
1827#if (PAGE_SIZE < 8192)
1828        /* if we are only owner of page we can reuse it */
1829        if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
1830                return false;
1831#else
1832#define IGC_LAST_OFFSET \
1833        (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
1834
1835        if (rx_buffer->page_offset > IGC_LAST_OFFSET)
1836                return false;
1837#endif
1838
1839        /* If we have drained the page fragment pool we need to update
1840         * the pagecnt_bias and page count so that we fully restock the
1841         * number of references the driver holds.
1842         */
1843        if (unlikely(pagecnt_bias == 1)) {
1844                page_ref_add(page, USHRT_MAX - 1);
1845                rx_buffer->pagecnt_bias = USHRT_MAX;
1846        }
1847
1848        return true;
1849}
1850
1851/**
1852 * igc_is_non_eop - process handling of non-EOP buffers
1853 * @rx_ring: Rx ring being processed
1854 * @rx_desc: Rx descriptor for current buffer
1855 *
1856 * This function updates next to clean.  If the buffer is an EOP buffer
1857 * this function exits returning false, otherwise it will place the
1858 * sk_buff in the next buffer to be chained and return true indicating
1859 * that this is in fact a non-EOP buffer.
1860 */
1861static bool igc_is_non_eop(struct igc_ring *rx_ring,
1862                           union igc_adv_rx_desc *rx_desc)
1863{
1864        u32 ntc = rx_ring->next_to_clean + 1;
1865
1866        /* fetch, update, and store next to clean */
1867        ntc = (ntc < rx_ring->count) ? ntc : 0;
1868        rx_ring->next_to_clean = ntc;
1869
1870        prefetch(IGC_RX_DESC(rx_ring, ntc));
1871
1872        if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
1873                return false;
1874
1875        return true;
1876}
1877
1878/**
1879 * igc_cleanup_headers - Correct corrupted or empty headers
1880 * @rx_ring: rx descriptor ring packet is being transacted on
1881 * @rx_desc: pointer to the EOP Rx descriptor
1882 * @skb: pointer to current skb being fixed
1883 *
1884 * Address the case where we are pulling data in on pages only
1885 * and as such no data is present in the skb header.
1886 *
1887 * In addition if skb is not at least 60 bytes we need to pad it so that
1888 * it is large enough to qualify as a valid Ethernet frame.
1889 *
1890 * Returns true if an error was encountered and skb was freed.
1891 */
1892static bool igc_cleanup_headers(struct igc_ring *rx_ring,
1893                                union igc_adv_rx_desc *rx_desc,
1894                                struct sk_buff *skb)
1895{
1896        /* XDP packets use error pointer so abort at this point */
1897        if (IS_ERR(skb))
1898                return true;
1899
1900        if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
1901                struct net_device *netdev = rx_ring->netdev;
1902
1903                if (!(netdev->features & NETIF_F_RXALL)) {
1904                        dev_kfree_skb_any(skb);
1905                        return true;
1906                }
1907        }
1908
1909        /* if eth_skb_pad returns an error the skb was freed */
1910        if (eth_skb_pad(skb))
1911                return true;
1912
1913        return false;
1914}
1915
1916static void igc_put_rx_buffer(struct igc_ring *rx_ring,
1917                              struct igc_rx_buffer *rx_buffer,
1918                              int rx_buffer_pgcnt)
1919{
1920        if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
1921                /* hand second half of page back to the ring */
1922                igc_reuse_rx_page(rx_ring, rx_buffer);
1923        } else {
1924                /* We are not reusing the buffer so unmap it and free
1925                 * any references we are holding to it
1926                 */
1927                dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
1928                                     igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
1929                                     IGC_RX_DMA_ATTR);
1930                __page_frag_cache_drain(rx_buffer->page,
1931                                        rx_buffer->pagecnt_bias);
1932        }
1933
1934        /* clear contents of rx_buffer */
1935        rx_buffer->page = NULL;
1936}
1937
1938static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
1939{
1940        struct igc_adapter *adapter = rx_ring->q_vector->adapter;
1941
1942        if (ring_uses_build_skb(rx_ring))
1943                return IGC_SKB_PAD;
1944        if (igc_xdp_is_enabled(adapter))
1945                return XDP_PACKET_HEADROOM;
1946
1947        return 0;
1948}
1949
1950static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
1951                                  struct igc_rx_buffer *bi)
1952{
1953        struct page *page = bi->page;
1954        dma_addr_t dma;
1955
1956        /* since we are recycling buffers we should seldom need to alloc */
1957        if (likely(page))
1958                return true;
1959
1960        /* alloc new page for storage */
1961        page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
1962        if (unlikely(!page)) {
1963                rx_ring->rx_stats.alloc_failed++;
1964                return false;
1965        }
1966
1967        /* map page for use */
1968        dma = dma_map_page_attrs(rx_ring->dev, page, 0,
1969                                 igc_rx_pg_size(rx_ring),
1970                                 DMA_FROM_DEVICE,
1971                                 IGC_RX_DMA_ATTR);
1972
1973        /* if mapping failed free memory back to system since
1974         * there isn't much point in holding memory we can't use
1975         */
1976        if (dma_mapping_error(rx_ring->dev, dma)) {
1977                __free_page(page);
1978
1979                rx_ring->rx_stats.alloc_failed++;
1980                return false;
1981        }
1982
1983        bi->dma = dma;
1984        bi->page = page;
1985        bi->page_offset = igc_rx_offset(rx_ring);
1986        page_ref_add(page, USHRT_MAX - 1);
1987        bi->pagecnt_bias = USHRT_MAX;
1988
1989        return true;
1990}
1991
1992/**
1993 * igc_alloc_rx_buffers - Replace used receive buffers; packet split
1994 * @rx_ring: rx descriptor ring
1995 * @cleaned_count: number of buffers to clean
1996 */
1997static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
1998{
1999        union igc_adv_rx_desc *rx_desc;
2000        u16 i = rx_ring->next_to_use;
2001        struct igc_rx_buffer *bi;
2002        u16 bufsz;
2003
2004        /* nothing to do */
2005        if (!cleaned_count)
2006                return;
2007
2008        rx_desc = IGC_RX_DESC(rx_ring, i);
2009        bi = &rx_ring->rx_buffer_info[i];
2010        i -= rx_ring->count;
2011
2012        bufsz = igc_rx_bufsz(rx_ring);
2013
2014        do {
2015                if (!igc_alloc_mapped_page(rx_ring, bi))
2016                        break;
2017
2018                /* sync the buffer for use by the device */
2019                dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
2020                                                 bi->page_offset, bufsz,
2021                                                 DMA_FROM_DEVICE);
2022
2023                /* Refresh the desc even if buffer_addrs didn't change
2024                 * because each write-back erases this info.
2025                 */
2026                rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
2027
2028                rx_desc++;
2029                bi++;
2030                i++;
2031                if (unlikely(!i)) {
2032                        rx_desc = IGC_RX_DESC(rx_ring, 0);
2033                        bi = rx_ring->rx_buffer_info;
2034                        i -= rx_ring->count;
2035                }
2036
2037                /* clear the length for the next_to_use descriptor */
2038                rx_desc->wb.upper.length = 0;
2039
2040                cleaned_count--;
2041        } while (cleaned_count);
2042
2043        i += rx_ring->count;
2044
2045        if (rx_ring->next_to_use != i) {
2046                /* record the next descriptor to use */
2047                rx_ring->next_to_use = i;
2048
2049                /* update next to alloc since we have filled the ring */
2050                rx_ring->next_to_alloc = i;
2051
2052                /* Force memory writes to complete before letting h/w
2053                 * know there are new descriptors to fetch.  (Only
2054                 * applicable for weak-ordered memory model archs,
2055                 * such as IA-64).
2056                 */
2057                wmb();
2058                writel(i, rx_ring->tail);
2059        }
2060}
2061
2062static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
2063{
2064        union igc_adv_rx_desc *desc;
2065        u16 i = ring->next_to_use;
2066        struct igc_rx_buffer *bi;
2067        dma_addr_t dma;
2068        bool ok = true;
2069
2070        if (!count)
2071                return ok;
2072
2073        desc = IGC_RX_DESC(ring, i);
2074        bi = &ring->rx_buffer_info[i];
2075        i -= ring->count;
2076
2077        do {
2078                bi->xdp = xsk_buff_alloc(ring->xsk_pool);
2079                if (!bi->xdp) {
2080                        ok = false;
2081                        break;
2082                }
2083
2084                dma = xsk_buff_xdp_get_dma(bi->xdp);
2085                desc->read.pkt_addr = cpu_to_le64(dma);
2086
2087                desc++;
2088                bi++;
2089                i++;
2090                if (unlikely(!i)) {
2091                        desc = IGC_RX_DESC(ring, 0);
2092                        bi = ring->rx_buffer_info;
2093                        i -= ring->count;
2094                }
2095
2096                /* Clear the length for the next_to_use descriptor. */
2097                desc->wb.upper.length = 0;
2098
2099                count--;
2100        } while (count);
2101
2102        i += ring->count;
2103
2104        if (ring->next_to_use != i) {
2105                ring->next_to_use = i;
2106
2107                /* Force memory writes to complete before letting h/w
2108                 * know there are new descriptors to fetch.  (Only
2109                 * applicable for weak-ordered memory model archs,
2110                 * such as IA-64).
2111                 */
2112                wmb();
2113                writel(i, ring->tail);
2114        }
2115
2116        return ok;
2117}
2118
2119static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
2120                                  struct xdp_frame *xdpf,
2121                                  struct igc_ring *ring)
2122{
2123        dma_addr_t dma;
2124
2125        dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
2126        if (dma_mapping_error(ring->dev, dma)) {
2127                netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
2128                return -ENOMEM;
2129        }
2130
2131        buffer->type = IGC_TX_BUFFER_TYPE_XDP;
2132        buffer->xdpf = xdpf;
2133        buffer->protocol = 0;
2134        buffer->bytecount = xdpf->len;
2135        buffer->gso_segs = 1;
2136        buffer->time_stamp = jiffies;
2137        dma_unmap_len_set(buffer, len, xdpf->len);
2138        dma_unmap_addr_set(buffer, dma, dma);
2139        return 0;
2140}
2141
2142/* This function requires __netif_tx_lock is held by the caller. */
2143static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
2144                                      struct xdp_frame *xdpf)
2145{
2146        struct igc_tx_buffer *buffer;
2147        union igc_adv_tx_desc *desc;
2148        u32 cmd_type, olinfo_status;
2149        int err;
2150
2151        if (!igc_desc_unused(ring))
2152                return -EBUSY;
2153
2154        buffer = &ring->tx_buffer_info[ring->next_to_use];
2155        err = igc_xdp_init_tx_buffer(buffer, xdpf, ring);
2156        if (err)
2157                return err;
2158
2159        cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2160                   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
2161                   buffer->bytecount;
2162        olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
2163
2164        desc = IGC_TX_DESC(ring, ring->next_to_use);
2165        desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2166        desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2167        desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma));
2168
2169        netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount);
2170
2171        buffer->next_to_watch = desc;
2172
2173        ring->next_to_use++;
2174        if (ring->next_to_use == ring->count)
2175                ring->next_to_use = 0;
2176
2177        return 0;
2178}
2179
2180static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
2181                                            int cpu)
2182{
2183        int index = cpu;
2184
2185        if (unlikely(index < 0))
2186                index = 0;
2187
2188        while (index >= adapter->num_tx_queues)
2189                index -= adapter->num_tx_queues;
2190
2191        return adapter->tx_ring[index];
2192}
2193
2194static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
2195{
2196        struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
2197        int cpu = smp_processor_id();
2198        struct netdev_queue *nq;
2199        struct igc_ring *ring;
2200        int res;
2201
2202        if (unlikely(!xdpf))
2203                return -EFAULT;
2204
2205        ring = igc_xdp_get_tx_ring(adapter, cpu);
2206        nq = txring_txq(ring);
2207
2208        __netif_tx_lock(nq, cpu);
2209        res = igc_xdp_init_tx_descriptor(ring, xdpf);
2210        __netif_tx_unlock(nq);
2211        return res;
2212}
2213
2214/* This function assumes rcu_read_lock() is held by the caller. */
2215static int __igc_xdp_run_prog(struct igc_adapter *adapter,
2216                              struct bpf_prog *prog,
2217                              struct xdp_buff *xdp)
2218{
2219        u32 act = bpf_prog_run_xdp(prog, xdp);
2220
2221        switch (act) {
2222        case XDP_PASS:
2223                return IGC_XDP_PASS;
2224        case XDP_TX:
2225                if (igc_xdp_xmit_back(adapter, xdp) < 0)
2226                        goto out_failure;
2227                return IGC_XDP_TX;
2228        case XDP_REDIRECT:
2229                if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
2230                        goto out_failure;
2231                return IGC_XDP_REDIRECT;
2232                break;
2233        default:
2234                bpf_warn_invalid_xdp_action(act);
2235                fallthrough;
2236        case XDP_ABORTED:
2237out_failure:
2238                trace_xdp_exception(adapter->netdev, prog, act);
2239                fallthrough;
2240        case XDP_DROP:
2241                return IGC_XDP_CONSUMED;
2242        }
2243}
2244
2245static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
2246                                        struct xdp_buff *xdp)
2247{
2248        struct bpf_prog *prog;
2249        int res;
2250
2251        prog = READ_ONCE(adapter->xdp_prog);
2252        if (!prog) {
2253                res = IGC_XDP_PASS;
2254                goto out;
2255        }
2256
2257        res = __igc_xdp_run_prog(adapter, prog, xdp);
2258
2259out:
2260        return ERR_PTR(-res);
2261}
2262
2263/* This function assumes __netif_tx_lock is held by the caller. */
2264static void igc_flush_tx_descriptors(struct igc_ring *ring)
2265{
2266        /* Once tail pointer is updated, hardware can fetch the descriptors
2267         * any time so we issue a write membar here to ensure all memory
2268         * writes are complete before the tail pointer is updated.
2269         */
2270        wmb();
2271        writel(ring->next_to_use, ring->tail);
2272}
2273
2274static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
2275{
2276        int cpu = smp_processor_id();
2277        struct netdev_queue *nq;
2278        struct igc_ring *ring;
2279
2280        if (status & IGC_XDP_TX) {
2281                ring = igc_xdp_get_tx_ring(adapter, cpu);
2282                nq = txring_txq(ring);
2283
2284                __netif_tx_lock(nq, cpu);
2285                igc_flush_tx_descriptors(ring);
2286                __netif_tx_unlock(nq);
2287        }
2288
2289        if (status & IGC_XDP_REDIRECT)
2290                xdp_do_flush();
2291}
2292
2293static void igc_update_rx_stats(struct igc_q_vector *q_vector,
2294                                unsigned int packets, unsigned int bytes)
2295{
2296        struct igc_ring *ring = q_vector->rx.ring;
2297
2298        u64_stats_update_begin(&ring->rx_syncp);
2299        ring->rx_stats.packets += packets;
2300        ring->rx_stats.bytes += bytes;
2301        u64_stats_update_end(&ring->rx_syncp);
2302
2303        q_vector->rx.total_packets += packets;
2304        q_vector->rx.total_bytes += bytes;
2305}
2306
2307static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
2308{
2309        unsigned int total_bytes = 0, total_packets = 0;
2310        struct igc_adapter *adapter = q_vector->adapter;
2311        struct igc_ring *rx_ring = q_vector->rx.ring;
2312        struct sk_buff *skb = rx_ring->skb;
2313        u16 cleaned_count = igc_desc_unused(rx_ring);
2314        int xdp_status = 0, rx_buffer_pgcnt;
2315
2316        while (likely(total_packets < budget)) {
2317                union igc_adv_rx_desc *rx_desc;
2318                struct igc_rx_buffer *rx_buffer;
2319                unsigned int size, truesize;
2320                ktime_t timestamp = 0;
2321                struct xdp_buff xdp;
2322                int pkt_offset = 0;
2323                void *pktbuf;
2324
2325                /* return some buffers to hardware, one at a time is too slow */
2326                if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
2327                        igc_alloc_rx_buffers(rx_ring, cleaned_count);
2328                        cleaned_count = 0;
2329                }
2330
2331                rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
2332                size = le16_to_cpu(rx_desc->wb.upper.length);
2333                if (!size)
2334                        break;
2335
2336                /* This memory barrier is needed to keep us from reading
2337                 * any other fields out of the rx_desc until we know the
2338                 * descriptor has been written back
2339                 */
2340                dma_rmb();
2341
2342                rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
2343                truesize = igc_get_rx_frame_truesize(rx_ring, size);
2344
2345                pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
2346
2347                if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
2348                        timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2349                                                        pktbuf);
2350                        pkt_offset = IGC_TS_HDR_LEN;
2351                        size -= IGC_TS_HDR_LEN;
2352                }
2353
2354                if (!skb) {
2355                        xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
2356                        xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
2357                                         igc_rx_offset(rx_ring) + pkt_offset, size, false);
2358
2359                        skb = igc_xdp_run_prog(adapter, &xdp);
2360                }
2361
2362                if (IS_ERR(skb)) {
2363                        unsigned int xdp_res = -PTR_ERR(skb);
2364
2365                        switch (xdp_res) {
2366                        case IGC_XDP_CONSUMED:
2367                                rx_buffer->pagecnt_bias++;
2368                                break;
2369                        case IGC_XDP_TX:
2370                        case IGC_XDP_REDIRECT:
2371                                igc_rx_buffer_flip(rx_buffer, truesize);
2372                                xdp_status |= xdp_res;
2373                                break;
2374                        }
2375
2376                        total_packets++;
2377                        total_bytes += size;
2378                } else if (skb)
2379                        igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
2380                else if (ring_uses_build_skb(rx_ring))
2381                        skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size);
2382                else
2383                        skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
2384                                                timestamp);
2385
2386                /* exit if we failed to retrieve a buffer */
2387                if (!skb) {
2388                        rx_ring->rx_stats.alloc_failed++;
2389                        rx_buffer->pagecnt_bias++;
2390                        break;
2391                }
2392
2393                igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
2394                cleaned_count++;
2395
2396                /* fetch next buffer in frame if non-eop */
2397                if (igc_is_non_eop(rx_ring, rx_desc))
2398                        continue;
2399
2400                /* verify the packet layout is correct */
2401                if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
2402                        skb = NULL;
2403                        continue;
2404                }
2405
2406                /* probably a little skewed due to removing CRC */
2407                total_bytes += skb->len;
2408
2409                /* populate checksum, VLAN, and protocol */
2410                igc_process_skb_fields(rx_ring, rx_desc, skb);
2411
2412                napi_gro_receive(&q_vector->napi, skb);
2413
2414                /* reset skb pointer */
2415                skb = NULL;
2416
2417                /* update budget accounting */
2418                total_packets++;
2419        }
2420
2421        if (xdp_status)
2422                igc_finalize_xdp(adapter, xdp_status);
2423
2424        /* place incomplete frames back on ring for completion */
2425        rx_ring->skb = skb;
2426
2427        igc_update_rx_stats(q_vector, total_packets, total_bytes);
2428
2429        if (cleaned_count)
2430                igc_alloc_rx_buffers(rx_ring, cleaned_count);
2431
2432        return total_packets;
2433}
2434
2435static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
2436                                            struct xdp_buff *xdp)
2437{
2438        unsigned int metasize = xdp->data - xdp->data_meta;
2439        unsigned int datasize = xdp->data_end - xdp->data;
2440        unsigned int totalsize = metasize + datasize;
2441        struct sk_buff *skb;
2442
2443        skb = __napi_alloc_skb(&ring->q_vector->napi,
2444                               xdp->data_end - xdp->data_hard_start,
2445                               GFP_ATOMIC | __GFP_NOWARN);
2446        if (unlikely(!skb))
2447                return NULL;
2448
2449        skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
2450        memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize);
2451        if (metasize)
2452                skb_metadata_set(skb, metasize);
2453
2454        return skb;
2455}
2456
2457static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
2458                                union igc_adv_rx_desc *desc,
2459                                struct xdp_buff *xdp,
2460                                ktime_t timestamp)
2461{
2462        struct igc_ring *ring = q_vector->rx.ring;
2463        struct sk_buff *skb;
2464
2465        skb = igc_construct_skb_zc(ring, xdp);
2466        if (!skb) {
2467                ring->rx_stats.alloc_failed++;
2468                return;
2469        }
2470
2471        if (timestamp)
2472                skb_hwtstamps(skb)->hwtstamp = timestamp;
2473
2474        if (igc_cleanup_headers(ring, desc, skb))
2475                return;
2476
2477        igc_process_skb_fields(ring, desc, skb);
2478        napi_gro_receive(&q_vector->napi, skb);
2479}
2480
2481static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
2482{
2483        struct igc_adapter *adapter = q_vector->adapter;
2484        struct igc_ring *ring = q_vector->rx.ring;
2485        u16 cleaned_count = igc_desc_unused(ring);
2486        int total_bytes = 0, total_packets = 0;
2487        u16 ntc = ring->next_to_clean;
2488        struct bpf_prog *prog;
2489        bool failure = false;
2490        int xdp_status = 0;
2491
2492        rcu_read_lock();
2493
2494        prog = READ_ONCE(adapter->xdp_prog);
2495
2496        while (likely(total_packets < budget)) {
2497                union igc_adv_rx_desc *desc;
2498                struct igc_rx_buffer *bi;
2499                ktime_t timestamp = 0;
2500                unsigned int size;
2501                int res;
2502
2503                desc = IGC_RX_DESC(ring, ntc);
2504                size = le16_to_cpu(desc->wb.upper.length);
2505                if (!size)
2506                        break;
2507
2508                /* This memory barrier is needed to keep us from reading
2509                 * any other fields out of the rx_desc until we know the
2510                 * descriptor has been written back
2511                 */
2512                dma_rmb();
2513
2514                bi = &ring->rx_buffer_info[ntc];
2515
2516                if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
2517                        timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2518                                                        bi->xdp->data);
2519
2520                        bi->xdp->data += IGC_TS_HDR_LEN;
2521
2522                        /* HW timestamp has been copied into local variable. Metadata
2523                         * length when XDP program is called should be 0.
2524                         */
2525                        bi->xdp->data_meta += IGC_TS_HDR_LEN;
2526                        size -= IGC_TS_HDR_LEN;
2527                }
2528
2529                bi->xdp->data_end = bi->xdp->data + size;
2530                xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);
2531
2532                res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
2533                switch (res) {
2534                case IGC_XDP_PASS:
2535                        igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp);
2536                        fallthrough;
2537                case IGC_XDP_CONSUMED:
2538                        xsk_buff_free(bi->xdp);
2539                        break;
2540                case IGC_XDP_TX:
2541                case IGC_XDP_REDIRECT:
2542                        xdp_status |= res;
2543                        break;
2544                }
2545
2546                bi->xdp = NULL;
2547                total_bytes += size;
2548                total_packets++;
2549                cleaned_count++;
2550                ntc++;
2551                if (ntc == ring->count)
2552                        ntc = 0;
2553        }
2554
2555        ring->next_to_clean = ntc;
2556        rcu_read_unlock();
2557
2558        if (cleaned_count >= IGC_RX_BUFFER_WRITE)
2559                failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count);
2560
2561        if (xdp_status)
2562                igc_finalize_xdp(adapter, xdp_status);
2563
2564        igc_update_rx_stats(q_vector, total_packets, total_bytes);
2565
2566        if (xsk_uses_need_wakeup(ring->xsk_pool)) {
2567                if (failure || ring->next_to_clean == ring->next_to_use)
2568                        xsk_set_rx_need_wakeup(ring->xsk_pool);
2569                else
2570                        xsk_clear_rx_need_wakeup(ring->xsk_pool);
2571                return total_packets;
2572        }
2573
2574        return failure ? budget : total_packets;
2575}
2576
2577static void igc_update_tx_stats(struct igc_q_vector *q_vector,
2578                                unsigned int packets, unsigned int bytes)
2579{
2580        struct igc_ring *ring = q_vector->tx.ring;
2581
2582        u64_stats_update_begin(&ring->tx_syncp);
2583        ring->tx_stats.bytes += bytes;
2584        ring->tx_stats.packets += packets;
2585        u64_stats_update_end(&ring->tx_syncp);
2586
2587        q_vector->tx.total_bytes += bytes;
2588        q_vector->tx.total_packets += packets;
2589}
2590
2591static void igc_xdp_xmit_zc(struct igc_ring *ring)
2592{
2593        struct xsk_buff_pool *pool = ring->xsk_pool;
2594        struct netdev_queue *nq = txring_txq(ring);
2595        union igc_adv_tx_desc *tx_desc = NULL;
2596        int cpu = smp_processor_id();
2597        u16 ntu = ring->next_to_use;
2598        struct xdp_desc xdp_desc;
2599        u16 budget;
2600
2601        if (!netif_carrier_ok(ring->netdev))
2602                return;
2603
2604        __netif_tx_lock(nq, cpu);
2605
2606        budget = igc_desc_unused(ring);
2607
2608        while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
2609                u32 cmd_type, olinfo_status;
2610                struct igc_tx_buffer *bi;
2611                dma_addr_t dma;
2612
2613                cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2614                           IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
2615                           xdp_desc.len;
2616                olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT;
2617
2618                dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
2619                xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len);
2620
2621                tx_desc = IGC_TX_DESC(ring, ntu);
2622                tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2623                tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2624                tx_desc->read.buffer_addr = cpu_to_le64(dma);
2625
2626                bi = &ring->tx_buffer_info[ntu];
2627                bi->type = IGC_TX_BUFFER_TYPE_XSK;
2628                bi->protocol = 0;
2629                bi->bytecount = xdp_desc.len;
2630                bi->gso_segs = 1;
2631                bi->time_stamp = jiffies;
2632                bi->next_to_watch = tx_desc;
2633
2634                netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len);
2635
2636                ntu++;
2637                if (ntu == ring->count)
2638                        ntu = 0;
2639        }
2640
2641        ring->next_to_use = ntu;
2642        if (tx_desc) {
2643                igc_flush_tx_descriptors(ring);
2644                xsk_tx_release(pool);
2645        }
2646
2647        __netif_tx_unlock(nq);
2648}
2649
2650/**
2651 * igc_clean_tx_irq - Reclaim resources after transmit completes
2652 * @q_vector: pointer to q_vector containing needed info
2653 * @napi_budget: Used to determine if we are in netpoll
2654 *
2655 * returns true if ring is completely cleaned
2656 */
2657static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
2658{
2659        struct igc_adapter *adapter = q_vector->adapter;
2660        unsigned int total_bytes = 0, total_packets = 0;
2661        unsigned int budget = q_vector->tx.work_limit;
2662        struct igc_ring *tx_ring = q_vector->tx.ring;
2663        unsigned int i = tx_ring->next_to_clean;
2664        struct igc_tx_buffer *tx_buffer;
2665        union igc_adv_tx_desc *tx_desc;
2666        u32 xsk_frames = 0;
2667
2668        if (test_bit(__IGC_DOWN, &adapter->state))
2669                return true;
2670
2671        tx_buffer = &tx_ring->tx_buffer_info[i];
2672        tx_desc = IGC_TX_DESC(tx_ring, i);
2673        i -= tx_ring->count;
2674
2675        do {
2676                union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
2677
2678                /* if next_to_watch is not set then there is no work pending */
2679                if (!eop_desc)
2680                        break;
2681
2682                /* prevent any other reads prior to eop_desc */
2683                smp_rmb();
2684
2685                /* if DD is not set pending work has not been completed */
2686                if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
2687                        break;
2688
2689                /* clear next_to_watch to prevent false hangs */
2690                tx_buffer->next_to_watch = NULL;
2691
2692                /* update the statistics for this packet */
2693                total_bytes += tx_buffer->bytecount;
2694                total_packets += tx_buffer->gso_segs;
2695
2696                switch (tx_buffer->type) {
2697                case IGC_TX_BUFFER_TYPE_XSK:
2698                        xsk_frames++;
2699                        break;
2700                case IGC_TX_BUFFER_TYPE_XDP:
2701                        xdp_return_frame(tx_buffer->xdpf);
2702                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2703                        break;
2704                case IGC_TX_BUFFER_TYPE_SKB:
2705                        napi_consume_skb(tx_buffer->skb, napi_budget);
2706                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2707                        break;
2708                default:
2709                        netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
2710                        break;
2711                }
2712
2713                /* clear last DMA location and unmap remaining buffers */
2714                while (tx_desc != eop_desc) {
2715                        tx_buffer++;
2716                        tx_desc++;
2717                        i++;
2718                        if (unlikely(!i)) {
2719                                i -= tx_ring->count;
2720                                tx_buffer = tx_ring->tx_buffer_info;
2721                                tx_desc = IGC_TX_DESC(tx_ring, 0);
2722                        }
2723
2724                        /* unmap any remaining paged data */
2725                        if (dma_unmap_len(tx_buffer, len))
2726                                igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2727                }
2728
2729                /* move us one more past the eop_desc for start of next pkt */
2730                tx_buffer++;
2731                tx_desc++;
2732                i++;
2733                if (unlikely(!i)) {
2734                        i -= tx_ring->count;
2735                        tx_buffer = tx_ring->tx_buffer_info;
2736                        tx_desc = IGC_TX_DESC(tx_ring, 0);
2737                }
2738
2739                /* issue prefetch for next Tx descriptor */
2740                prefetch(tx_desc);
2741
2742                /* update budget accounting */
2743                budget--;
2744        } while (likely(budget));
2745
2746        netdev_tx_completed_queue(txring_txq(tx_ring),
2747                                  total_packets, total_bytes);
2748
2749        i += tx_ring->count;
2750        tx_ring->next_to_clean = i;
2751
2752        igc_update_tx_stats(q_vector, total_packets, total_bytes);
2753
2754        if (tx_ring->xsk_pool) {
2755                if (xsk_frames)
2756                        xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
2757                if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
2758                        xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
2759                igc_xdp_xmit_zc(tx_ring);
2760        }
2761
2762        if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
2763                struct igc_hw *hw = &adapter->hw;
2764
2765                /* Detect a transmit hang in hardware, this serializes the
2766                 * check with the clearing of time_stamp and movement of i
2767                 */
2768                clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
2769                if (tx_buffer->next_to_watch &&
2770                    time_after(jiffies, tx_buffer->time_stamp +
2771                    (adapter->tx_timeout_factor * HZ)) &&
2772                    !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
2773                        /* detected Tx unit hang */
2774                        netdev_err(tx_ring->netdev,
2775                                   "Detected Tx Unit Hang\n"
2776                                   "  Tx Queue             <%d>\n"
2777                                   "  TDH                  <%x>\n"
2778                                   "  TDT                  <%x>\n"
2779                                   "  next_to_use          <%x>\n"
2780                                   "  next_to_clean        <%x>\n"
2781                                   "buffer_info[next_to_clean]\n"
2782                                   "  time_stamp           <%lx>\n"
2783                                   "  next_to_watch        <%p>\n"
2784                                   "  jiffies              <%lx>\n"
2785                                   "  desc.status          <%x>\n",
2786                                   tx_ring->queue_index,
2787                                   rd32(IGC_TDH(tx_ring->reg_idx)),
2788                                   readl(tx_ring->tail),
2789                                   tx_ring->next_to_use,
2790                                   tx_ring->next_to_clean,
2791                                   tx_buffer->time_stamp,
2792                                   tx_buffer->next_to_watch,
2793                                   jiffies,
2794                                   tx_buffer->next_to_watch->wb.status);
2795                        netif_stop_subqueue(tx_ring->netdev,
2796                                            tx_ring->queue_index);
2797
2798                        /* we are about to reset, no point in enabling stuff */
2799                        return true;
2800                }
2801        }
2802
2803#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
2804        if (unlikely(total_packets &&
2805                     netif_carrier_ok(tx_ring->netdev) &&
2806                     igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
2807                /* Make sure that anybody stopping the queue after this
2808                 * sees the new next_to_clean.
2809                 */
2810                smp_mb();
2811                if (__netif_subqueue_stopped(tx_ring->netdev,
2812                                             tx_ring->queue_index) &&
2813                    !(test_bit(__IGC_DOWN, &adapter->state))) {
2814                        netif_wake_subqueue(tx_ring->netdev,
2815                                            tx_ring->queue_index);
2816
2817                        u64_stats_update_begin(&tx_ring->tx_syncp);
2818                        tx_ring->tx_stats.restart_queue++;
2819                        u64_stats_update_end(&tx_ring->tx_syncp);
2820                }
2821        }
2822
2823        return !!budget;
2824}
2825
2826static int igc_find_mac_filter(struct igc_adapter *adapter,
2827                               enum igc_mac_filter_type type, const u8 *addr)
2828{
2829        struct igc_hw *hw = &adapter->hw;
2830        int max_entries = hw->mac.rar_entry_count;
2831        u32 ral, rah;
2832        int i;
2833
2834        for (i = 0; i < max_entries; i++) {
2835                ral = rd32(IGC_RAL(i));
2836                rah = rd32(IGC_RAH(i));
2837
2838                if (!(rah & IGC_RAH_AV))
2839                        continue;
2840                if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type)
2841                        continue;
2842                if ((rah & IGC_RAH_RAH_MASK) !=
2843                    le16_to_cpup((__le16 *)(addr + 4)))
2844                        continue;
2845                if (ral != le32_to_cpup((__le32 *)(addr)))
2846                        continue;
2847
2848                return i;
2849        }
2850
2851        return -1;
2852}
2853
2854static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
2855{
2856        struct igc_hw *hw = &adapter->hw;
2857        int max_entries = hw->mac.rar_entry_count;
2858        u32 rah;
2859        int i;
2860
2861        for (i = 0; i < max_entries; i++) {
2862                rah = rd32(IGC_RAH(i));
2863
2864                if (!(rah & IGC_RAH_AV))
2865                        return i;
2866        }
2867
2868        return -1;
2869}
2870
2871/**
2872 * igc_add_mac_filter() - Add MAC address filter
2873 * @adapter: Pointer to adapter where the filter should be added
2874 * @type: MAC address filter type (source or destination)
2875 * @addr: MAC address
2876 * @queue: If non-negative, queue assignment feature is enabled and frames
2877 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
2878 *         assignment is disabled.
2879 *
2880 * Return: 0 in case of success, negative errno code otherwise.
2881 */
2882static int igc_add_mac_filter(struct igc_adapter *adapter,
2883                              enum igc_mac_filter_type type, const u8 *addr,
2884                              int queue)
2885{
2886        struct net_device *dev = adapter->netdev;
2887        int index;
2888
2889        index = igc_find_mac_filter(adapter, type, addr);
2890        if (index >= 0)
2891                goto update_filter;
2892
2893        index = igc_get_avail_mac_filter_slot(adapter);
2894        if (index < 0)
2895                return -ENOSPC;
2896
2897        netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n",
2898                   index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
2899                   addr, queue);
2900
2901update_filter:
2902        igc_set_mac_filter_hw(adapter, index, type, addr, queue);
2903        return 0;
2904}
2905
2906/**
2907 * igc_del_mac_filter() - Delete MAC address filter
2908 * @adapter: Pointer to adapter where the filter should be deleted from
2909 * @type: MAC address filter type (source or destination)
2910 * @addr: MAC address
2911 */
2912static void igc_del_mac_filter(struct igc_adapter *adapter,
2913                               enum igc_mac_filter_type type, const u8 *addr)
2914{
2915        struct net_device *dev = adapter->netdev;
2916        int index;
2917
2918        index = igc_find_mac_filter(adapter, type, addr);
2919        if (index < 0)
2920                return;
2921
2922        if (index == 0) {
2923                /* If this is the default filter, we don't actually delete it.
2924                 * We just reset to its default value i.e. disable queue
2925                 * assignment.
2926                 */
2927                netdev_dbg(dev, "Disable default MAC filter queue assignment");
2928
2929                igc_set_mac_filter_hw(adapter, 0, type, addr, -1);
2930        } else {
2931                netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n",
2932                           index,
2933                           type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
2934                           addr);
2935
2936                igc_clear_mac_filter_hw(adapter, index);
2937        }
2938}
2939
2940/**
2941 * igc_add_vlan_prio_filter() - Add VLAN priority filter
2942 * @adapter: Pointer to adapter where the filter should be added
2943 * @prio: VLAN priority value
2944 * @queue: Queue number which matching frames are assigned to
2945 *
2946 * Return: 0 in case of success, negative errno code otherwise.
2947 */
2948static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
2949                                    int queue)
2950{
2951        struct net_device *dev = adapter->netdev;
2952        struct igc_hw *hw = &adapter->hw;
2953        u32 vlanpqf;
2954
2955        vlanpqf = rd32(IGC_VLANPQF);
2956
2957        if (vlanpqf & IGC_VLANPQF_VALID(prio)) {
2958                netdev_dbg(dev, "VLAN priority filter already in use\n");
2959                return -EEXIST;
2960        }
2961
2962        vlanpqf |= IGC_VLANPQF_QSEL(prio, queue);
2963        vlanpqf |= IGC_VLANPQF_VALID(prio);
2964
2965        wr32(IGC_VLANPQF, vlanpqf);
2966
2967        netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n",
2968                   prio, queue);
2969        return 0;
2970}
2971
2972/**
2973 * igc_del_vlan_prio_filter() - Delete VLAN priority filter
2974 * @adapter: Pointer to adapter where the filter should be deleted from
2975 * @prio: VLAN priority value
2976 */
2977static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
2978{
2979        struct igc_hw *hw = &adapter->hw;
2980        u32 vlanpqf;
2981
2982        vlanpqf = rd32(IGC_VLANPQF);
2983
2984        vlanpqf &= ~IGC_VLANPQF_VALID(prio);
2985        vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK);
2986
2987        wr32(IGC_VLANPQF, vlanpqf);
2988
2989        netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n",
2990                   prio);
2991}
2992
2993static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter)
2994{
2995        struct igc_hw *hw = &adapter->hw;
2996        int i;
2997
2998        for (i = 0; i < MAX_ETYPE_FILTER; i++) {
2999                u32 etqf = rd32(IGC_ETQF(i));
3000
3001                if (!(etqf & IGC_ETQF_FILTER_ENABLE))
3002                        return i;
3003        }
3004
3005        return -1;
3006}
3007
3008/**
3009 * igc_add_etype_filter() - Add ethertype filter
3010 * @adapter: Pointer to adapter where the filter should be added
3011 * @etype: Ethertype value
3012 * @queue: If non-negative, queue assignment feature is enabled and frames
3013 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
3014 *         assignment is disabled.
3015 *
3016 * Return: 0 in case of success, negative errno code otherwise.
3017 */
3018static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype,
3019                                int queue)
3020{
3021        struct igc_hw *hw = &adapter->hw;
3022        int index;
3023        u32 etqf;
3024
3025        index = igc_get_avail_etype_filter_slot(adapter);
3026        if (index < 0)
3027                return -ENOSPC;
3028
3029        etqf = rd32(IGC_ETQF(index));
3030
3031        etqf &= ~IGC_ETQF_ETYPE_MASK;
3032        etqf |= etype;
3033
3034        if (queue >= 0) {
3035                etqf &= ~IGC_ETQF_QUEUE_MASK;
3036                etqf |= (queue << IGC_ETQF_QUEUE_SHIFT);
3037                etqf |= IGC_ETQF_QUEUE_ENABLE;
3038        }
3039
3040        etqf |= IGC_ETQF_FILTER_ENABLE;
3041
3042        wr32(IGC_ETQF(index), etqf);
3043
3044        netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n",
3045                   etype, queue);
3046        return 0;
3047}
3048
3049static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype)
3050{
3051        struct igc_hw *hw = &adapter->hw;
3052        int i;
3053
3054        for (i = 0; i < MAX_ETYPE_FILTER; i++) {
3055                u32 etqf = rd32(IGC_ETQF(i));
3056
3057                if ((etqf & IGC_ETQF_ETYPE_MASK) == etype)
3058                        return i;
3059        }
3060
3061        return -1;
3062}
3063
3064/**
3065 * igc_del_etype_filter() - Delete ethertype filter
3066 * @adapter: Pointer to adapter where the filter should be deleted from
3067 * @etype: Ethertype value
3068 */
3069static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
3070{
3071        struct igc_hw *hw = &adapter->hw;
3072        int index;
3073
3074        index = igc_find_etype_filter(adapter, etype);
3075        if (index < 0)
3076                return;
3077
3078        wr32(IGC_ETQF(index), 0);
3079
3080        netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n",
3081                   etype);
3082}
3083
3084static int igc_flex_filter_select(struct igc_adapter *adapter,
3085                                  struct igc_flex_filter *input,
3086                                  u32 *fhft)
3087{
3088        struct igc_hw *hw = &adapter->hw;
3089        u8 fhft_index;
3090        u32 fhftsl;
3091
3092        if (input->index >= MAX_FLEX_FILTER) {
3093                dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n");
3094                return -EINVAL;
3095        }
3096
3097        /* Indirect table select register */
3098        fhftsl = rd32(IGC_FHFTSL);
3099        fhftsl &= ~IGC_FHFTSL_FTSL_MASK;
3100        switch (input->index) {
3101        case 0 ... 7:
3102                fhftsl |= 0x00;
3103                break;
3104        case 8 ... 15:
3105                fhftsl |= 0x01;
3106                break;
3107        case 16 ... 23:
3108                fhftsl |= 0x02;
3109                break;
3110        case 24 ... 31:
3111                fhftsl |= 0x03;
3112                break;
3113        }
3114        wr32(IGC_FHFTSL, fhftsl);
3115
3116        /* Normalize index down to host table register */
3117        fhft_index = input->index % 8;
3118
3119        *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) :
3120                IGC_FHFT_EXT(fhft_index - 4);
3121
3122        return 0;
3123}
3124
3125static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
3126                                    struct igc_flex_filter *input)
3127{
3128        struct device *dev = &adapter->pdev->dev;
3129        struct igc_hw *hw = &adapter->hw;
3130        u8 *data = input->data;
3131        u8 *mask = input->mask;
3132        u32 queuing;
3133        u32 fhft;
3134        u32 wufc;
3135        int ret;
3136        int i;
3137
3138        /* Length has to be aligned to 8. Otherwise the filter will fail. Bail
3139         * out early to avoid surprises later.
3140         */
3141        if (input->length % 8 != 0) {
3142                dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n");
3143                return -EINVAL;
3144        }
3145
3146        /* Select corresponding flex filter register and get base for host table. */
3147        ret = igc_flex_filter_select(adapter, input, &fhft);
3148        if (ret)
3149                return ret;
3150
3151        /* When adding a filter globally disable flex filter feature. That is
3152         * recommended within the datasheet.
3153         */
3154        wufc = rd32(IGC_WUFC);
3155        wufc &= ~IGC_WUFC_FLEX_HQ;
3156        wr32(IGC_WUFC, wufc);
3157
3158        /* Configure filter */
3159        queuing = input->length & IGC_FHFT_LENGTH_MASK;
3160        queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK;
3161        queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK;
3162
3163        if (input->immediate_irq)
3164                queuing |= IGC_FHFT_IMM_INT;
3165
3166        if (input->drop)
3167                queuing |= IGC_FHFT_DROP;
3168
3169        wr32(fhft + 0xFC, queuing);
3170
3171        /* Write data (128 byte) and mask (128 bit) */
3172        for (i = 0; i < 16; ++i) {
3173                const size_t data_idx = i * 8;
3174                const size_t row_idx = i * 16;
3175                u32 dw0 =
3176                        (data[data_idx + 0] << 0) |
3177                        (data[data_idx + 1] << 8) |
3178                        (data[data_idx + 2] << 16) |
3179                        (data[data_idx + 3] << 24);
3180                u32 dw1 =
3181                        (data[data_idx + 4] << 0) |
3182                        (data[data_idx + 5] << 8) |
3183                        (data[data_idx + 6] << 16) |
3184                        (data[data_idx + 7] << 24);
3185                u32 tmp;
3186
3187                /* Write row: dw0, dw1 and mask */
3188                wr32(fhft + row_idx, dw0);
3189                wr32(fhft + row_idx + 4, dw1);
3190
3191                /* mask is only valid for MASK(7, 0) */
3192                tmp = rd32(fhft + row_idx + 8);
3193                tmp &= ~GENMASK(7, 0);
3194                tmp |= mask[i];
3195                wr32(fhft + row_idx + 8, tmp);
3196        }
3197
3198        /* Enable filter. */
3199        wufc |= IGC_WUFC_FLEX_HQ;
3200        if (input->index > 8) {
3201                /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */
3202                u32 wufc_ext = rd32(IGC_WUFC_EXT);
3203
3204                wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8));
3205
3206                wr32(IGC_WUFC_EXT, wufc_ext);
3207        } else {
3208                wufc |= (IGC_WUFC_FLX0 << input->index);
3209        }
3210        wr32(IGC_WUFC, wufc);
3211
3212        dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n",
3213                input->index);
3214
3215        return 0;
3216}
3217
3218static void igc_flex_filter_add_field(struct igc_flex_filter *flex,
3219                                      const void *src, unsigned int offset,
3220                                      size_t len, const void *mask)
3221{
3222        int i;
3223
3224        /* data */
3225        memcpy(&flex->data[offset], src, len);
3226
3227        /* mask */
3228        for (i = 0; i < len; ++i) {
3229                const unsigned int idx = i + offset;
3230                const u8 *ptr = mask;
3231
3232                if (mask) {
3233                        if (ptr[i] & 0xff)
3234                                flex->mask[idx / 8] |= BIT(idx % 8);
3235
3236                        continue;
3237                }
3238
3239                flex->mask[idx / 8] |= BIT(idx % 8);
3240        }
3241}
3242
3243static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter)
3244{
3245        struct igc_hw *hw = &adapter->hw;
3246        u32 wufc, wufc_ext;
3247        int i;
3248
3249        wufc = rd32(IGC_WUFC);
3250        wufc_ext = rd32(IGC_WUFC_EXT);
3251
3252        for (i = 0; i < MAX_FLEX_FILTER; i++) {
3253                if (i < 8) {
3254                        if (!(wufc & (IGC_WUFC_FLX0 << i)))
3255                                return i;
3256                } else {
3257                        if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8))))
3258                                return i;
3259                }
3260        }
3261
3262        return -ENOSPC;
3263}
3264
3265static bool igc_flex_filter_in_use(struct igc_adapter *adapter)
3266{
3267        struct igc_hw *hw = &adapter->hw;
3268        u32 wufc, wufc_ext;
3269
3270        wufc = rd32(IGC_WUFC);
3271        wufc_ext = rd32(IGC_WUFC_EXT);
3272
3273        if (wufc & IGC_WUFC_FILTER_MASK)
3274                return true;
3275
3276        if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK)
3277                return true;
3278
3279        return false;
3280}
3281
3282static int igc_add_flex_filter(struct igc_adapter *adapter,
3283                               struct igc_nfc_rule *rule)
3284{
3285        struct igc_flex_filter flex = { };
3286        struct igc_nfc_filter *filter = &rule->filter;
3287        unsigned int eth_offset, user_offset;
3288        int ret, index;
3289        bool vlan;
3290
3291        index = igc_find_avail_flex_filter_slot(adapter);
3292        if (index < 0)
3293                return -ENOSPC;
3294
3295        /* Construct the flex filter:
3296         *  -> dest_mac [6]
3297         *  -> src_mac [6]
3298         *  -> tpid [2]
3299         *  -> vlan tci [2]
3300         *  -> ether type [2]
3301         *  -> user data [8]
3302         *  -> = 26 bytes => 32 length
3303         */
3304        flex.index    = index;
3305        flex.length   = 32;
3306        flex.rx_queue = rule->action;
3307
3308        vlan = rule->filter.vlan_tci || rule->filter.vlan_etype;
3309        eth_offset = vlan ? 16 : 12;
3310        user_offset = vlan ? 18 : 14;
3311
3312        /* Add destination MAC  */
3313        if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
3314                igc_flex_filter_add_field(&flex, &filter->dst_addr, 0,
3315                                          ETH_ALEN, NULL);
3316
3317        /* Add source MAC */
3318        if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
3319                igc_flex_filter_add_field(&flex, &filter->src_addr, 6,
3320                                          ETH_ALEN, NULL);
3321
3322        /* Add VLAN etype */
3323        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE)
3324                igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12,
3325                                          sizeof(filter->vlan_etype),
3326                                          NULL);
3327
3328        /* Add VLAN TCI */
3329        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
3330                igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14,
3331                                          sizeof(filter->vlan_tci), NULL);
3332
3333        /* Add Ether type */
3334        if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
3335                __be16 etype = cpu_to_be16(filter->etype);
3336
3337                igc_flex_filter_add_field(&flex, &etype, eth_offset,
3338                                          sizeof(etype), NULL);
3339        }
3340
3341        /* Add user data */
3342        if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA)
3343                igc_flex_filter_add_field(&flex, &filter->user_data,
3344                                          user_offset,
3345                                          sizeof(filter->user_data),
3346                                          filter->user_mask);
3347
3348        /* Add it down to the hardware and enable it. */
3349        ret = igc_write_flex_filter_ll(adapter, &flex);
3350        if (ret)
3351                return ret;
3352
3353        filter->flex_index = index;
3354
3355        return 0;
3356}
3357
3358static void igc_del_flex_filter(struct igc_adapter *adapter,
3359                                u16 reg_index)
3360{
3361        struct igc_hw *hw = &adapter->hw;
3362        u32 wufc;
3363
3364        /* Just disable the filter. The filter table itself is kept
3365         * intact. Another flex_filter_add() should override the "old" data
3366         * then.
3367         */
3368        if (reg_index > 8) {
3369                u32 wufc_ext = rd32(IGC_WUFC_EXT);
3370
3371                wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8));
3372                wr32(IGC_WUFC_EXT, wufc_ext);
3373        } else {
3374                wufc = rd32(IGC_WUFC);
3375
3376                wufc &= ~(IGC_WUFC_FLX0 << reg_index);
3377                wr32(IGC_WUFC, wufc);
3378        }
3379
3380        if (igc_flex_filter_in_use(adapter))
3381                return;
3382
3383        /* No filters are in use, we may disable flex filters */
3384        wufc = rd32(IGC_WUFC);
3385        wufc &= ~IGC_WUFC_FLEX_HQ;
3386        wr32(IGC_WUFC, wufc);
3387}
3388
3389static int igc_enable_nfc_rule(struct igc_adapter *adapter,
3390                               struct igc_nfc_rule *rule)
3391{
3392        int err;
3393
3394        if (rule->flex) {
3395                return igc_add_flex_filter(adapter, rule);
3396        }
3397
3398        if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
3399                err = igc_add_etype_filter(adapter, rule->filter.etype,
3400                                           rule->action);
3401                if (err)
3402                        return err;
3403        }
3404
3405        if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
3406                err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3407                                         rule->filter.src_addr, rule->action);
3408                if (err)
3409                        return err;
3410        }
3411
3412        if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
3413                err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3414                                         rule->filter.dst_addr, rule->action);
3415                if (err)
3416                        return err;
3417        }
3418
3419        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3420                int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3421                           VLAN_PRIO_SHIFT;
3422
3423                err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
3424                if (err)
3425                        return err;
3426        }
3427
3428        return 0;
3429}
3430
3431static void igc_disable_nfc_rule(struct igc_adapter *adapter,
3432                                 const struct igc_nfc_rule *rule)
3433{
3434        if (rule->flex) {
3435                igc_del_flex_filter(adapter, rule->filter.flex_index);
3436                return;
3437        }
3438
3439        if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
3440                igc_del_etype_filter(adapter, rule->filter.etype);
3441
3442        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3443                int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3444                           VLAN_PRIO_SHIFT;
3445
3446                igc_del_vlan_prio_filter(adapter, prio);
3447        }
3448
3449        if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
3450                igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3451                                   rule->filter.src_addr);
3452
3453        if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
3454                igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3455                                   rule->filter.dst_addr);
3456}
3457
3458/**
3459 * igc_get_nfc_rule() - Get NFC rule
3460 * @adapter: Pointer to adapter
3461 * @location: Rule location
3462 *
3463 * Context: Expects adapter->nfc_rule_lock to be held by caller.
3464 *
3465 * Return: Pointer to NFC rule at @location. If not found, NULL.
3466 */
3467struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter,
3468                                      u32 location)
3469{
3470        struct igc_nfc_rule *rule;
3471
3472        list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
3473                if (rule->location == location)
3474                        return rule;
3475                if (rule->location > location)
3476                        break;
3477        }
3478
3479        return NULL;
3480}
3481
3482/**
3483 * igc_del_nfc_rule() - Delete NFC rule
3484 * @adapter: Pointer to adapter
3485 * @rule: Pointer to rule to be deleted
3486 *
3487 * Disable NFC rule in hardware and delete it from adapter.
3488 *
3489 * Context: Expects adapter->nfc_rule_lock to be held by caller.
3490 */
3491void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3492{
3493        igc_disable_nfc_rule(adapter, rule);
3494
3495        list_del(&rule->list);
3496        adapter->nfc_rule_count--;
3497
3498        kfree(rule);
3499}
3500
3501static void igc_flush_nfc_rules(struct igc_adapter *adapter)
3502{
3503        struct igc_nfc_rule *rule, *tmp;
3504
3505        mutex_lock(&adapter->nfc_rule_lock);
3506
3507        list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list)
3508                igc_del_nfc_rule(adapter, rule);
3509
3510        mutex_unlock(&adapter->nfc_rule_lock);
3511}
3512
3513/**
3514 * igc_add_nfc_rule() - Add NFC rule
3515 * @adapter: Pointer to adapter
3516 * @rule: Pointer to rule to be added
3517 *
3518 * Enable NFC rule in hardware and add it to adapter.
3519 *
3520 * Context: Expects adapter->nfc_rule_lock to be held by caller.
3521 *
3522 * Return: 0 on success, negative errno on failure.
3523 */
3524int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3525{
3526        struct igc_nfc_rule *pred, *cur;
3527        int err;
3528
3529        err = igc_enable_nfc_rule(adapter, rule);
3530        if (err)
3531                return err;
3532
3533        pred = NULL;
3534        list_for_each_entry(cur, &adapter->nfc_rule_list, list) {
3535                if (cur->location >= rule->location)
3536                        break;
3537                pred = cur;
3538        }
3539
3540        list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list);
3541        adapter->nfc_rule_count++;
3542        return 0;
3543}
3544
3545static void igc_restore_nfc_rules(struct igc_adapter *adapter)
3546{
3547        struct igc_nfc_rule *rule;
3548
3549        mutex_lock(&adapter->nfc_rule_lock);
3550
3551        list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
3552                igc_enable_nfc_rule(adapter, rule);
3553
3554        mutex_unlock(&adapter->nfc_rule_lock);
3555}
3556
3557static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
3558{
3559        struct igc_adapter *adapter = netdev_priv(netdev);
3560
3561        return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1);
3562}
3563
3564static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
3565{
3566        struct igc_adapter *adapter = netdev_priv(netdev);
3567
3568        igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr);
3569        return 0;
3570}
3571
3572/**
3573 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3574 * @netdev: network interface device structure
3575 *
3576 * The set_rx_mode entry point is called whenever the unicast or multicast
3577 * address lists or the network interface flags are updated.  This routine is
3578 * responsible for configuring the hardware for proper unicast, multicast,
3579 * promiscuous mode, and all-multi behavior.
3580 */
3581static void igc_set_rx_mode(struct net_device *netdev)
3582{
3583        struct igc_adapter *adapter = netdev_priv(netdev);
3584        struct igc_hw *hw = &adapter->hw;
3585        u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
3586        int count;
3587
3588        /* Check for Promiscuous and All Multicast modes */
3589        if (netdev->flags & IFF_PROMISC) {
3590                rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
3591        } else {
3592                if (netdev->flags & IFF_ALLMULTI) {
3593                        rctl |= IGC_RCTL_MPE;
3594                } else {
3595                        /* Write addresses to the MTA, if the attempt fails
3596                         * then we should just turn on promiscuous mode so
3597                         * that we can at least receive multicast traffic
3598                         */
3599                        count = igc_write_mc_addr_list(netdev);
3600                        if (count < 0)
3601                                rctl |= IGC_RCTL_MPE;
3602                }
3603        }
3604
3605        /* Write addresses to available RAR registers, if there is not
3606         * sufficient space to store all the addresses then enable
3607         * unicast promiscuous mode
3608         */
3609        if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
3610                rctl |= IGC_RCTL_UPE;
3611
3612        /* update state of unicast and multicast */
3613        rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
3614        wr32(IGC_RCTL, rctl);
3615
3616#if (PAGE_SIZE < 8192)
3617        if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
3618                rlpml = IGC_MAX_FRAME_BUILD_SKB;
3619#endif
3620        wr32(IGC_RLPML, rlpml);
3621}
3622
3623/**
3624 * igc_configure - configure the hardware for RX and TX
3625 * @adapter: private board structure
3626 */
3627static void igc_configure(struct igc_adapter *adapter)
3628{
3629        struct net_device *netdev = adapter->netdev;
3630        int i = 0;
3631
3632        igc_get_hw_control(adapter);
3633        igc_set_rx_mode(netdev);
3634
3635        igc_restore_vlan(adapter);
3636
3637        igc_setup_tctl(adapter);
3638        igc_setup_mrqc(adapter);
3639        igc_setup_rctl(adapter);
3640
3641        igc_set_default_mac_filter(adapter);
3642        igc_restore_nfc_rules(adapter);
3643
3644        igc_configure_tx(adapter);
3645        igc_configure_rx(adapter);
3646
3647        igc_rx_fifo_flush_base(&adapter->hw);
3648
3649        /* call igc_desc_unused which always leaves
3650         * at least 1 descriptor unused to make sure
3651         * next_to_use != next_to_clean
3652         */
3653        for (i = 0; i < adapter->num_rx_queues; i++) {
3654                struct igc_ring *ring = adapter->rx_ring[i];
3655
3656                if (ring->xsk_pool)
3657                        igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
3658                else
3659                        igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
3660        }
3661}
3662
3663/**
3664 * igc_write_ivar - configure ivar for given MSI-X vector
3665 * @hw: pointer to the HW structure
3666 * @msix_vector: vector number we are allocating to a given ring
3667 * @index: row index of IVAR register to write within IVAR table
3668 * @offset: column offset of in IVAR, should be multiple of 8
3669 *
3670 * The IVAR table consists of 2 columns,
3671 * each containing an cause allocation for an Rx and Tx ring, and a
3672 * variable number of rows depending on the number of queues supported.
3673 */
3674static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
3675                           int index, int offset)
3676{
3677        u32 ivar = array_rd32(IGC_IVAR0, index);
3678
3679        /* clear any bits that are currently set */
3680        ivar &= ~((u32)0xFF << offset);
3681
3682        /* write vector and valid bit */
3683        ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
3684
3685        array_wr32(IGC_IVAR0, index, ivar);
3686}
3687
3688static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
3689{
3690        struct igc_adapter *adapter = q_vector->adapter;
3691        struct igc_hw *hw = &adapter->hw;
3692        int rx_queue = IGC_N0_QUEUE;
3693        int tx_queue = IGC_N0_QUEUE;
3694
3695        if (q_vector->rx.ring)
3696                rx_queue = q_vector->rx.ring->reg_idx;
3697        if (q_vector->tx.ring)
3698                tx_queue = q_vector->tx.ring->reg_idx;
3699
3700        switch (hw->mac.type) {
3701        case igc_i225:
3702                if (rx_queue > IGC_N0_QUEUE)
3703                        igc_write_ivar(hw, msix_vector,
3704                                       rx_queue >> 1,
3705                                       (rx_queue & 0x1) << 4);
3706                if (tx_queue > IGC_N0_QUEUE)
3707                        igc_write_ivar(hw, msix_vector,
3708                                       tx_queue >> 1,
3709                                       ((tx_queue & 0x1) << 4) + 8);
3710                q_vector->eims_value = BIT(msix_vector);
3711                break;
3712        default:
3713                WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
3714                break;
3715        }
3716
3717        /* add q_vector eims value to global eims_enable_mask */
3718        adapter->eims_enable_mask |= q_vector->eims_value;
3719
3720        /* configure q_vector to set itr on first interrupt */
3721        q_vector->set_itr = 1;
3722}
3723
3724/**
3725 * igc_configure_msix - Configure MSI-X hardware
3726 * @adapter: Pointer to adapter structure
3727 *
3728 * igc_configure_msix sets up the hardware to properly
3729 * generate MSI-X interrupts.
3730 */
3731static void igc_configure_msix(struct igc_adapter *adapter)
3732{
3733        struct igc_hw *hw = &adapter->hw;
3734        int i, vector = 0;
3735        u32 tmp;
3736
3737        adapter->eims_enable_mask = 0;
3738
3739        /* set vector for other causes, i.e. link changes */
3740        switch (hw->mac.type) {
3741        case igc_i225:
3742                /* Turn on MSI-X capability first, or our settings
3743                 * won't stick.  And it will take days to debug.
3744                 */
3745                wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
3746                     IGC_GPIE_PBA | IGC_GPIE_EIAME |
3747                     IGC_GPIE_NSICR);
3748
3749                /* enable msix_other interrupt */
3750                adapter->eims_other = BIT(vector);
3751                tmp = (vector++ | IGC_IVAR_VALID) << 8;
3752
3753                wr32(IGC_IVAR_MISC, tmp);
3754                break;
3755        default:
3756                /* do nothing, since nothing else supports MSI-X */
3757                break;
3758        } /* switch (hw->mac.type) */
3759
3760        adapter->eims_enable_mask |= adapter->eims_other;
3761
3762        for (i = 0; i < adapter->num_q_vectors; i++)
3763                igc_assign_vector(adapter->q_vector[i], vector++);
3764
3765        wrfl();
3766}
3767
3768/**
3769 * igc_irq_enable - Enable default interrupt generation settings
3770 * @adapter: board private structure
3771 */
3772static void igc_irq_enable(struct igc_adapter *adapter)
3773{
3774        struct igc_hw *hw = &adapter->hw;
3775
3776        if (adapter->msix_entries) {
3777                u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
3778                u32 regval = rd32(IGC_EIAC);
3779
3780                wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
3781                regval = rd32(IGC_EIAM);
3782                wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
3783                wr32(IGC_EIMS, adapter->eims_enable_mask);
3784                wr32(IGC_IMS, ims);
3785        } else {
3786                wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3787                wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3788        }
3789}
3790
3791/**
3792 * igc_irq_disable - Mask off interrupt generation on the NIC
3793 * @adapter: board private structure
3794 */
3795static void igc_irq_disable(struct igc_adapter *adapter)
3796{
3797        struct igc_hw *hw = &adapter->hw;
3798
3799        if (adapter->msix_entries) {
3800                u32 regval = rd32(IGC_EIAM);
3801
3802                wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
3803                wr32(IGC_EIMC, adapter->eims_enable_mask);
3804                regval = rd32(IGC_EIAC);
3805                wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
3806        }
3807
3808        wr32(IGC_IAM, 0);
3809        wr32(IGC_IMC, ~0);
3810        wrfl();
3811
3812        if (adapter->msix_entries) {
3813                int vector = 0, i;
3814
3815                synchronize_irq(adapter->msix_entries[vector++].vector);
3816
3817                for (i = 0; i < adapter->num_q_vectors; i++)
3818                        synchronize_irq(adapter->msix_entries[vector++].vector);
3819        } else {
3820                synchronize_irq(adapter->pdev->irq);
3821        }
3822}
3823
3824void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
3825                              const u32 max_rss_queues)
3826{
3827        /* Determine if we need to pair queues. */
3828        /* If rss_queues > half of max_rss_queues, pair the queues in
3829         * order to conserve interrupts due to limited supply.
3830         */
3831        if (adapter->rss_queues > (max_rss_queues / 2))
3832                adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
3833        else
3834                adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
3835}
3836
3837unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
3838{
3839        return IGC_MAX_RX_QUEUES;
3840}
3841
3842static void igc_init_queue_configuration(struct igc_adapter *adapter)
3843{
3844        u32 max_rss_queues;
3845
3846        max_rss_queues = igc_get_max_rss_queues(adapter);
3847        adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
3848
3849        igc_set_flag_queue_pairs(adapter, max_rss_queues);
3850}
3851
3852/**
3853 * igc_reset_q_vector - Reset config for interrupt vector
3854 * @adapter: board private structure to initialize
3855 * @v_idx: Index of vector to be reset
3856 *
3857 * If NAPI is enabled it will delete any references to the
3858 * NAPI struct. This is preparation for igc_free_q_vector.
3859 */
3860static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
3861{
3862        struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
3863
3864        /* if we're coming from igc_set_interrupt_capability, the vectors are
3865         * not yet allocated
3866         */
3867        if (!q_vector)
3868                return;
3869
3870        if (q_vector->tx.ring)
3871                adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
3872
3873        if (q_vector->rx.ring)
3874                adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
3875
3876        netif_napi_del(&q_vector->napi);
3877}
3878
3879/**
3880 * igc_free_q_vector - Free memory allocated for specific interrupt vector
3881 * @adapter: board private structure to initialize
3882 * @v_idx: Index of vector to be freed
3883 *
3884 * This function frees the memory allocated to the q_vector.
3885 */
3886static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
3887{
3888        struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
3889
3890        adapter->q_vector[v_idx] = NULL;
3891
3892        /* igc_get_stats64() might access the rings on this vector,
3893         * we must wait a grace period before freeing it.
3894         */
3895        if (q_vector)
3896                kfree_rcu(q_vector, rcu);
3897}
3898
3899/**
3900 * igc_free_q_vectors - Free memory allocated for interrupt vectors
3901 * @adapter: board private structure to initialize
3902 *
3903 * This function frees the memory allocated to the q_vectors.  In addition if
3904 * NAPI is enabled it will delete any references to the NAPI struct prior
3905 * to freeing the q_vector.
3906 */
3907static void igc_free_q_vectors(struct igc_adapter *adapter)
3908{
3909        int v_idx = adapter->num_q_vectors;
3910
3911        adapter->num_tx_queues = 0;
3912        adapter->num_rx_queues = 0;
3913        adapter->num_q_vectors = 0;
3914
3915        while (v_idx--) {
3916                igc_reset_q_vector(adapter, v_idx);
3917                igc_free_q_vector(adapter, v_idx);
3918        }
3919}
3920
3921/**
3922 * igc_update_itr - update the dynamic ITR value based on statistics
3923 * @q_vector: pointer to q_vector
3924 * @ring_container: ring info to update the itr for
3925 *
3926 * Stores a new ITR value based on packets and byte
3927 * counts during the last interrupt.  The advantage of per interrupt
3928 * computation is faster updates and more accurate ITR for the current
3929 * traffic pattern.  Constants in this function were computed
3930 * based on theoretical maximum wire speed and thresholds were set based
3931 * on testing data as well as attempting to minimize response time
3932 * while increasing bulk throughput.
3933 * NOTE: These calculations are only valid when operating in a single-
3934 * queue environment.
3935 */
3936static void igc_update_itr(struct igc_q_vector *q_vector,
3937                           struct igc_ring_container *ring_container)
3938{
3939        unsigned int packets = ring_container->total_packets;
3940        unsigned int bytes = ring_container->total_bytes;
3941        u8 itrval = ring_container->itr;
3942
3943        /* no packets, exit with status unchanged */
3944        if (packets == 0)
3945                return;
3946
3947        switch (itrval) {
3948        case lowest_latency:
3949                /* handle TSO and jumbo frames */
3950                if (bytes / packets > 8000)
3951                        itrval = bulk_latency;
3952                else if ((packets < 5) && (bytes > 512))
3953                        itrval = low_latency;
3954                break;
3955        case low_latency:  /* 50 usec aka 20000 ints/s */
3956                if (bytes > 10000) {
3957                        /* this if handles the TSO accounting */
3958                        if (bytes / packets > 8000)
3959                                itrval = bulk_latency;
3960                        else if ((packets < 10) || ((bytes / packets) > 1200))
3961                                itrval = bulk_latency;
3962                        else if ((packets > 35))
3963                                itrval = lowest_latency;
3964                } else if (bytes / packets > 2000) {
3965                        itrval = bulk_latency;
3966                } else if (packets <= 2 && bytes < 512) {
3967                        itrval = lowest_latency;
3968                }
3969                break;
3970        case bulk_latency: /* 250 usec aka 4000 ints/s */
3971                if (bytes > 25000) {
3972                        if (packets > 35)
3973                                itrval = low_latency;
3974                } else if (bytes < 1500) {
3975                        itrval = low_latency;
3976                }
3977                break;
3978        }
3979
3980        /* clear work counters since we have the values we need */
3981        ring_container->total_bytes = 0;
3982        ring_container->total_packets = 0;
3983
3984        /* write updated itr to ring container */
3985        ring_container->itr = itrval;
3986}
3987
3988static void igc_set_itr(struct igc_q_vector *q_vector)
3989{
3990        struct igc_adapter *adapter = q_vector->adapter;
3991        u32 new_itr = q_vector->itr_val;
3992        u8 current_itr = 0;
3993
3994        /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3995        switch (adapter->link_speed) {
3996        case SPEED_10:
3997        case SPEED_100:
3998                current_itr = 0;
3999                new_itr = IGC_4K_ITR;
4000                goto set_itr_now;
4001        default:
4002                break;
4003        }
4004
4005        igc_update_itr(q_vector, &q_vector->tx);
4006        igc_update_itr(q_vector, &q_vector->rx);
4007
4008        current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4009
4010        /* conservative mode (itr 3) eliminates the lowest_latency setting */
4011        if (current_itr == lowest_latency &&
4012            ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4013            (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4014                current_itr = low_latency;
4015
4016        switch (current_itr) {
4017        /* counts and packets in update_itr are dependent on these numbers */
4018        case lowest_latency:
4019                new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
4020                break;
4021        case low_latency:
4022                new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
4023                break;
4024        case bulk_latency:
4025                new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
4026                break;
4027        default:
4028                break;
4029        }
4030
4031set_itr_now:
4032        if (new_itr != q_vector->itr_val) {
4033                /* this attempts to bias the interrupt rate towards Bulk
4034                 * by adding intermediate steps when interrupt rate is
4035                 * increasing
4036                 */
4037                new_itr = new_itr > q_vector->itr_val ?
4038                          max((new_itr * q_vector->itr_val) /
4039                          (new_itr + (q_vector->itr_val >> 2)),
4040                          new_itr) : new_itr;
4041                /* Don't write the value here; it resets the adapter's
4042                 * internal timer, and causes us to delay far longer than
4043                 * we should between interrupts.  Instead, we write the ITR
4044                 * value at the beginning of the next interrupt so the timing
4045                 * ends up being correct.
4046                 */
4047                q_vector->itr_val = new_itr;
4048                q_vector->set_itr = 1;
4049        }
4050}
4051
4052static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
4053{
4054        int v_idx = adapter->num_q_vectors;
4055
4056        if (adapter->msix_entries) {
4057                pci_disable_msix(adapter->pdev);
4058                kfree(adapter->msix_entries);
4059                adapter->msix_entries = NULL;
4060        } else if (adapter->flags & IGC_FLAG_HAS_MSI) {
4061                pci_disable_msi(adapter->pdev);
4062        }
4063
4064        while (v_idx--)
4065                igc_reset_q_vector(adapter, v_idx);
4066}
4067
4068/**
4069 * igc_set_interrupt_capability - set MSI or MSI-X if supported
4070 * @adapter: Pointer to adapter structure
4071 * @msix: boolean value for MSI-X capability
4072 *
4073 * Attempt to configure interrupts using the best available
4074 * capabilities of the hardware and kernel.
4075 */
4076static void igc_set_interrupt_capability(struct igc_adapter *adapter,
4077                                         bool msix)
4078{
4079        int numvecs, i;
4080        int err;
4081
4082        if (!msix)
4083                goto msi_only;
4084        adapter->flags |= IGC_FLAG_HAS_MSIX;
4085
4086        /* Number of supported queues. */
4087        adapter->num_rx_queues = adapter->rss_queues;
4088
4089        adapter->num_tx_queues = adapter->rss_queues;
4090
4091        /* start with one vector for every Rx queue */
4092        numvecs = adapter->num_rx_queues;
4093
4094        /* if Tx handler is separate add 1 for every Tx queue */
4095        if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
4096                numvecs += adapter->num_tx_queues;
4097
4098        /* store the number of vectors reserved for queues */
4099        adapter->num_q_vectors = numvecs;
4100
4101        /* add 1 vector for link status interrupts */
4102        numvecs++;
4103
4104        adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
4105                                        GFP_KERNEL);
4106
4107        if (!adapter->msix_entries)
4108                return;
4109
4110        /* populate entry values */
4111        for (i = 0; i < numvecs; i++)
4112                adapter->msix_entries[i].entry = i;
4113
4114        err = pci_enable_msix_range(adapter->pdev,
4115                                    adapter->msix_entries,
4116                                    numvecs,
4117                                    numvecs);
4118        if (err > 0)
4119                return;
4120
4121        kfree(adapter->msix_entries);
4122        adapter->msix_entries = NULL;
4123
4124        igc_reset_interrupt_capability(adapter);
4125
4126msi_only:
4127        adapter->flags &= ~IGC_FLAG_HAS_MSIX;
4128
4129        adapter->rss_queues = 1;
4130        adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
4131        adapter->num_rx_queues = 1;
4132        adapter->num_tx_queues = 1;
4133        adapter->num_q_vectors = 1;
4134        if (!pci_enable_msi(adapter->pdev))
4135                adapter->flags |= IGC_FLAG_HAS_MSI;
4136}
4137
4138/**
4139 * igc_update_ring_itr - update the dynamic ITR value based on packet size
4140 * @q_vector: pointer to q_vector
4141 *
4142 * Stores a new ITR value based on strictly on packet size.  This
4143 * algorithm is less sophisticated than that used in igc_update_itr,
4144 * due to the difficulty of synchronizing statistics across multiple
4145 * receive rings.  The divisors and thresholds used by this function
4146 * were determined based on theoretical maximum wire speed and testing
4147 * data, in order to minimize response time while increasing bulk
4148 * throughput.
4149 * NOTE: This function is called only when operating in a multiqueue
4150 * receive environment.
4151 */
4152static void igc_update_ring_itr(struct igc_q_vector *q_vector)
4153{
4154        struct igc_adapter *adapter = q_vector->adapter;
4155        int new_val = q_vector->itr_val;
4156        int avg_wire_size = 0;
4157        unsigned int packets;
4158
4159        /* For non-gigabit speeds, just fix the interrupt rate at 4000
4160         * ints/sec - ITR timer value of 120 ticks.
4161         */
4162        switch (adapter->link_speed) {
4163        case SPEED_10:
4164        case SPEED_100:
4165                new_val = IGC_4K_ITR;
4166                goto set_itr_val;
4167        default:
4168                break;
4169        }
4170
4171        packets = q_vector->rx.total_packets;
4172        if (packets)
4173                avg_wire_size = q_vector->rx.total_bytes / packets;
4174
4175        packets = q_vector->tx.total_packets;
4176        if (packets)
4177                avg_wire_size = max_t(u32, avg_wire_size,
4178                                      q_vector->tx.total_bytes / packets);
4179
4180        /* if avg_wire_size isn't set no work was done */
4181        if (!avg_wire_size)
4182                goto clear_counts;
4183
4184        /* Add 24 bytes to size to account for CRC, preamble, and gap */
4185        avg_wire_size += 24;
4186
4187        /* Don't starve jumbo frames */
4188        avg_wire_size = min(avg_wire_size, 3000);
4189
4190        /* Give a little boost to mid-size frames */
4191        if (avg_wire_size > 300 && avg_wire_size < 1200)
4192                new_val = avg_wire_size / 3;
4193        else
4194                new_val = avg_wire_size / 2;
4195
4196        /* conservative mode (itr 3) eliminates the lowest_latency setting */
4197        if (new_val < IGC_20K_ITR &&
4198            ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4199            (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4200                new_val = IGC_20K_ITR;
4201
4202set_itr_val:
4203        if (new_val != q_vector->itr_val) {
4204                q_vector->itr_val = new_val;
4205                q_vector->set_itr = 1;
4206        }
4207clear_counts:
4208        q_vector->rx.total_bytes = 0;
4209        q_vector->rx.total_packets = 0;
4210        q_vector->tx.total_bytes = 0;
4211        q_vector->tx.total_packets = 0;
4212}
4213
4214static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
4215{
4216        struct igc_adapter *adapter = q_vector->adapter;
4217        struct igc_hw *hw = &adapter->hw;
4218
4219        if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
4220            (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
4221                if (adapter->num_q_vectors == 1)
4222                        igc_set_itr(q_vector);
4223                else
4224                        igc_update_ring_itr(q_vector);
4225        }
4226
4227        if (!test_bit(__IGC_DOWN, &adapter->state)) {
4228                if (adapter->msix_entries)
4229                        wr32(IGC_EIMS, q_vector->eims_value);
4230                else
4231                        igc_irq_enable(adapter);
4232        }
4233}
4234
4235static void igc_add_ring(struct igc_ring *ring,
4236                         struct igc_ring_container *head)
4237{
4238        head->ring = ring;
4239        head->count++;
4240}
4241
4242/**
4243 * igc_cache_ring_register - Descriptor ring to register mapping
4244 * @adapter: board private structure to initialize
4245 *
4246 * Once we know the feature-set enabled for the device, we'll cache
4247 * the register offset the descriptor ring is assigned to.
4248 */
4249static void igc_cache_ring_register(struct igc_adapter *adapter)
4250{
4251        int i = 0, j = 0;
4252
4253        switch (adapter->hw.mac.type) {
4254        case igc_i225:
4255        default:
4256                for (; i < adapter->num_rx_queues; i++)
4257                        adapter->rx_ring[i]->reg_idx = i;
4258                for (; j < adapter->num_tx_queues; j++)
4259                        adapter->tx_ring[j]->reg_idx = j;
4260                break;
4261        }
4262}
4263
4264/**
4265 * igc_poll - NAPI Rx polling callback
4266 * @napi: napi polling structure
4267 * @budget: count of how many packets we should handle
4268 */
4269static int igc_poll(struct napi_struct *napi, int budget)
4270{
4271        struct igc_q_vector *q_vector = container_of(napi,
4272                                                     struct igc_q_vector,
4273                                                     napi);
4274        struct igc_ring *rx_ring = q_vector->rx.ring;
4275        bool clean_complete = true;
4276        int work_done = 0;
4277
4278        if (q_vector->tx.ring)
4279                clean_complete = igc_clean_tx_irq(q_vector, budget);
4280
4281        if (rx_ring) {
4282                int cleaned = rx_ring->xsk_pool ?
4283                              igc_clean_rx_irq_zc(q_vector, budget) :
4284                              igc_clean_rx_irq(q_vector, budget);
4285
4286                work_done += cleaned;
4287                if (cleaned >= budget)
4288                        clean_complete = false;
4289        }
4290
4291        /* If all work not completed, return budget and keep polling */
4292        if (!clean_complete)
4293                return budget;
4294
4295        /* Exit the polling mode, but don't re-enable interrupts if stack might
4296         * poll us due to busy-polling
4297         */
4298        if (likely(napi_complete_done(napi, work_done)))
4299                igc_ring_irq_enable(q_vector);
4300
4301        return min(work_done, budget - 1);
4302}
4303
4304/**
4305 * igc_alloc_q_vector - Allocate memory for a single interrupt vector
4306 * @adapter: board private structure to initialize
4307 * @v_count: q_vectors allocated on adapter, used for ring interleaving
4308 * @v_idx: index of vector in adapter struct
4309 * @txr_count: total number of Tx rings to allocate
4310 * @txr_idx: index of first Tx ring to allocate
4311 * @rxr_count: total number of Rx rings to allocate
4312 * @rxr_idx: index of first Rx ring to allocate
4313 *
4314 * We allocate one q_vector.  If allocation fails we return -ENOMEM.
4315 */
4316static int igc_alloc_q_vector(struct igc_adapter *adapter,
4317                              unsigned int v_count, unsigned int v_idx,
4318                              unsigned int txr_count, unsigned int txr_idx,
4319                              unsigned int rxr_count, unsigned int rxr_idx)
4320{
4321        struct igc_q_vector *q_vector;
4322        struct igc_ring *ring;
4323        int ring_count;
4324
4325        /* igc only supports 1 Tx and/or 1 Rx queue per vector */
4326        if (txr_count > 1 || rxr_count > 1)
4327                return -ENOMEM;
4328
4329        ring_count = txr_count + rxr_count;
4330
4331        /* allocate q_vector and rings */
4332        q_vector = adapter->q_vector[v_idx];
4333        if (!q_vector)
4334                q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
4335                                   GFP_KERNEL);
4336        else
4337                memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
4338        if (!q_vector)
4339                return -ENOMEM;
4340
4341        /* initialize NAPI */
4342        netif_napi_add(adapter->netdev, &q_vector->napi,
4343                       igc_poll, 64);
4344
4345        /* tie q_vector and adapter together */
4346        adapter->q_vector[v_idx] = q_vector;
4347        q_vector->adapter = adapter;
4348
4349        /* initialize work limits */
4350        q_vector->tx.work_limit = adapter->tx_work_limit;
4351
4352        /* initialize ITR configuration */
4353        q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
4354        q_vector->itr_val = IGC_START_ITR;
4355
4356        /* initialize pointer to rings */
4357        ring = q_vector->ring;
4358
4359        /* initialize ITR */
4360        if (rxr_count) {
4361                /* rx or rx/tx vector */
4362                if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
4363                        q_vector->itr_val = adapter->rx_itr_setting;
4364        } else {
4365                /* tx only vector */
4366                if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
4367                        q_vector->itr_val = adapter->tx_itr_setting;
4368        }
4369
4370        if (txr_count) {
4371                /* assign generic ring traits */
4372                ring->dev = &adapter->pdev->dev;
4373                ring->netdev = adapter->netdev;
4374
4375                /* configure backlink on ring */
4376                ring->q_vector = q_vector;
4377
4378                /* update q_vector Tx values */
4379                igc_add_ring(ring, &q_vector->tx);
4380
4381                /* apply Tx specific ring traits */
4382                ring->count = adapter->tx_ring_count;
4383                ring->queue_index = txr_idx;
4384
4385                /* assign ring to adapter */
4386                adapter->tx_ring[txr_idx] = ring;
4387
4388                /* push pointer to next ring */
4389                ring++;
4390        }
4391
4392        if (rxr_count) {
4393                /* assign generic ring traits */
4394                ring->dev = &adapter->pdev->dev;
4395                ring->netdev = adapter->netdev;
4396
4397                /* configure backlink on ring */
4398                ring->q_vector = q_vector;
4399
4400                /* update q_vector Rx values */
4401                igc_add_ring(ring, &q_vector->rx);
4402
4403                /* apply Rx specific ring traits */
4404                ring->count = adapter->rx_ring_count;
4405                ring->queue_index = rxr_idx;
4406
4407                /* assign ring to adapter */
4408                adapter->rx_ring[rxr_idx] = ring;
4409        }
4410
4411        return 0;
4412}
4413
4414/**
4415 * igc_alloc_q_vectors - Allocate memory for interrupt vectors
4416 * @adapter: board private structure to initialize
4417 *
4418 * We allocate one q_vector per queue interrupt.  If allocation fails we
4419 * return -ENOMEM.
4420 */
4421static int igc_alloc_q_vectors(struct igc_adapter *adapter)
4422{
4423        int rxr_remaining = adapter->num_rx_queues;
4424        int txr_remaining = adapter->num_tx_queues;
4425        int rxr_idx = 0, txr_idx = 0, v_idx = 0;
4426        int q_vectors = adapter->num_q_vectors;
4427        int err;
4428
4429        if (q_vectors >= (rxr_remaining + txr_remaining)) {
4430                for (; rxr_remaining; v_idx++) {
4431                        err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4432                                                 0, 0, 1, rxr_idx);
4433
4434                        if (err)
4435                                goto err_out;
4436
4437                        /* update counts and index */
4438                        rxr_remaining--;
4439                        rxr_idx++;
4440                }
4441        }
4442
4443        for (; v_idx < q_vectors; v_idx++) {
4444                int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
4445                int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
4446
4447                err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4448                                         tqpv, txr_idx, rqpv, rxr_idx);
4449
4450                if (err)
4451                        goto err_out;
4452
4453                /* update counts and index */
4454                rxr_remaining -= rqpv;
4455                txr_remaining -= tqpv;
4456                rxr_idx++;
4457                txr_idx++;
4458        }
4459
4460        return 0;
4461
4462err_out:
4463        adapter->num_tx_queues = 0;
4464        adapter->num_rx_queues = 0;
4465        adapter->num_q_vectors = 0;
4466
4467        while (v_idx--)
4468                igc_free_q_vector(adapter, v_idx);
4469
4470        return -ENOMEM;
4471}
4472
4473/**
4474 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
4475 * @adapter: Pointer to adapter structure
4476 * @msix: boolean for MSI-X capability
4477 *
4478 * This function initializes the interrupts and allocates all of the queues.
4479 */
4480static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
4481{
4482        struct net_device *dev = adapter->netdev;
4483        int err = 0;
4484
4485        igc_set_interrupt_capability(adapter, msix);
4486
4487        err = igc_alloc_q_vectors(adapter);
4488        if (err) {
4489                netdev_err(dev, "Unable to allocate memory for vectors\n");
4490                goto err_alloc_q_vectors;
4491        }
4492
4493        igc_cache_ring_register(adapter);
4494
4495        return 0;
4496
4497err_alloc_q_vectors:
4498        igc_reset_interrupt_capability(adapter);
4499        return err;
4500}
4501
4502/**
4503 * igc_sw_init - Initialize general software structures (struct igc_adapter)
4504 * @adapter: board private structure to initialize
4505 *
4506 * igc_sw_init initializes the Adapter private data structure.
4507 * Fields are initialized based on PCI device information and
4508 * OS network device settings (MTU size).
4509 */
4510static int igc_sw_init(struct igc_adapter *adapter)
4511{
4512        struct net_device *netdev = adapter->netdev;
4513        struct pci_dev *pdev = adapter->pdev;
4514        struct igc_hw *hw = &adapter->hw;
4515
4516        pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
4517
4518        /* set default ring sizes */
4519        adapter->tx_ring_count = IGC_DEFAULT_TXD;
4520        adapter->rx_ring_count = IGC_DEFAULT_RXD;
4521
4522        /* set default ITR values */
4523        adapter->rx_itr_setting = IGC_DEFAULT_ITR;
4524        adapter->tx_itr_setting = IGC_DEFAULT_ITR;
4525
4526        /* set default work limits */
4527        adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
4528
4529        /* adjust max frame to be at least the size of a standard frame */
4530        adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
4531                                VLAN_HLEN;
4532        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
4533
4534        mutex_init(&adapter->nfc_rule_lock);
4535        INIT_LIST_HEAD(&adapter->nfc_rule_list);
4536        adapter->nfc_rule_count = 0;
4537
4538        spin_lock_init(&adapter->stats64_lock);
4539        /* Assume MSI-X interrupts, will be checked during IRQ allocation */
4540        adapter->flags |= IGC_FLAG_HAS_MSIX;
4541
4542        igc_init_queue_configuration(adapter);
4543
4544        /* This call may decrease the number of queues */
4545        if (igc_init_interrupt_scheme(adapter, true)) {
4546                netdev_err(netdev, "Unable to allocate memory for queues\n");
4547                return -ENOMEM;
4548        }
4549
4550        /* Explicitly disable IRQ since the NIC can be in any state. */
4551        igc_irq_disable(adapter);
4552
4553        set_bit(__IGC_DOWN, &adapter->state);
4554
4555        return 0;
4556}
4557
4558/**
4559 * igc_up - Open the interface and prepare it to handle traffic
4560 * @adapter: board private structure
4561 */
4562void igc_up(struct igc_adapter *adapter)
4563{
4564        struct igc_hw *hw = &adapter->hw;
4565        int i = 0;
4566
4567        /* hardware has been reset, we need to reload some things */
4568        igc_configure(adapter);
4569
4570        clear_bit(__IGC_DOWN, &adapter->state);
4571
4572        for (i = 0; i < adapter->num_q_vectors; i++)
4573                napi_enable(&adapter->q_vector[i]->napi);
4574
4575        if (adapter->msix_entries)
4576                igc_configure_msix(adapter);
4577        else
4578                igc_assign_vector(adapter->q_vector[0], 0);
4579
4580        /* Clear any pending interrupts. */
4581        rd32(IGC_ICR);
4582        igc_irq_enable(adapter);
4583
4584        netif_tx_start_all_queues(adapter->netdev);
4585
4586        /* start the watchdog. */
4587        hw->mac.get_link_status = true;
4588        schedule_work(&adapter->watchdog_task);
4589}
4590
4591/**
4592 * igc_update_stats - Update the board statistics counters
4593 * @adapter: board private structure
4594 */
4595void igc_update_stats(struct igc_adapter *adapter)
4596{
4597        struct rtnl_link_stats64 *net_stats = &adapter->stats64;
4598        struct pci_dev *pdev = adapter->pdev;
4599        struct igc_hw *hw = &adapter->hw;
4600        u64 _bytes, _packets;
4601        u64 bytes, packets;
4602        unsigned int start;
4603        u32 mpc;
4604        int i;
4605
4606        /* Prevent stats update while adapter is being reset, or if the pci
4607         * connection is down.
4608         */
4609        if (adapter->link_speed == 0)
4610                return;
4611        if (pci_channel_offline(pdev))
4612                return;
4613
4614        packets = 0;
4615        bytes = 0;
4616
4617        rcu_read_lock();
4618        for (i = 0; i < adapter->num_rx_queues; i++) {
4619                struct igc_ring *ring = adapter->rx_ring[i];
4620                u32 rqdpc = rd32(IGC_RQDPC(i));
4621
4622                if (hw->mac.type >= igc_i225)
4623                        wr32(IGC_RQDPC(i), 0);
4624
4625                if (rqdpc) {
4626                        ring->rx_stats.drops += rqdpc;
4627                        net_stats->rx_fifo_errors += rqdpc;
4628                }
4629
4630                do {
4631                        start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
4632                        _bytes = ring->rx_stats.bytes;
4633                        _packets = ring->rx_stats.packets;
4634                } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
4635                bytes += _bytes;
4636                packets += _packets;
4637        }
4638
4639        net_stats->rx_bytes = bytes;
4640        net_stats->rx_packets = packets;
4641
4642        packets = 0;
4643        bytes = 0;
4644        for (i = 0; i < adapter->num_tx_queues; i++) {
4645                struct igc_ring *ring = adapter->tx_ring[i];
4646
4647                do {
4648                        start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
4649                        _bytes = ring->tx_stats.bytes;
4650                        _packets = ring->tx_stats.packets;
4651                } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
4652                bytes += _bytes;
4653                packets += _packets;
4654        }
4655        net_stats->tx_bytes = bytes;
4656        net_stats->tx_packets = packets;
4657        rcu_read_unlock();
4658
4659        /* read stats registers */
4660        adapter->stats.crcerrs += rd32(IGC_CRCERRS);
4661        adapter->stats.gprc += rd32(IGC_GPRC);
4662        adapter->stats.gorc += rd32(IGC_GORCL);
4663        rd32(IGC_GORCH); /* clear GORCL */
4664        adapter->stats.bprc += rd32(IGC_BPRC);
4665        adapter->stats.mprc += rd32(IGC_MPRC);
4666        adapter->stats.roc += rd32(IGC_ROC);
4667
4668        adapter->stats.prc64 += rd32(IGC_PRC64);
4669        adapter->stats.prc127 += rd32(IGC_PRC127);
4670        adapter->stats.prc255 += rd32(IGC_PRC255);
4671        adapter->stats.prc511 += rd32(IGC_PRC511);
4672        adapter->stats.prc1023 += rd32(IGC_PRC1023);
4673        adapter->stats.prc1522 += rd32(IGC_PRC1522);
4674        adapter->stats.tlpic += rd32(IGC_TLPIC);
4675        adapter->stats.rlpic += rd32(IGC_RLPIC);
4676        adapter->stats.hgptc += rd32(IGC_HGPTC);
4677
4678        mpc = rd32(IGC_MPC);
4679        adapter->stats.mpc += mpc;
4680        net_stats->rx_fifo_errors += mpc;
4681        adapter->stats.scc += rd32(IGC_SCC);
4682        adapter->stats.ecol += rd32(IGC_ECOL);
4683        adapter->stats.mcc += rd32(IGC_MCC);
4684        adapter->stats.latecol += rd32(IGC_LATECOL);
4685        adapter->stats.dc += rd32(IGC_DC);
4686        adapter->stats.rlec += rd32(IGC_RLEC);
4687        adapter->stats.xonrxc += rd32(IGC_XONRXC);
4688        adapter->stats.xontxc += rd32(IGC_XONTXC);
4689        adapter->stats.xoffrxc += rd32(IGC_XOFFRXC);
4690        adapter->stats.xofftxc += rd32(IGC_XOFFTXC);
4691        adapter->stats.fcruc += rd32(IGC_FCRUC);
4692        adapter->stats.gptc += rd32(IGC_GPTC);
4693        adapter->stats.gotc += rd32(IGC_GOTCL);
4694        rd32(IGC_GOTCH); /* clear GOTCL */
4695        adapter->stats.rnbc += rd32(IGC_RNBC);
4696        adapter->stats.ruc += rd32(IGC_RUC);
4697        adapter->stats.rfc += rd32(IGC_RFC);
4698        adapter->stats.rjc += rd32(IGC_RJC);
4699        adapter->stats.tor += rd32(IGC_TORH);
4700        adapter->stats.tot += rd32(IGC_TOTH);
4701        adapter->stats.tpr += rd32(IGC_TPR);
4702
4703        adapter->stats.ptc64 += rd32(IGC_PTC64);
4704        adapter->stats.ptc127 += rd32(IGC_PTC127);
4705        adapter->stats.ptc255 += rd32(IGC_PTC255);
4706        adapter->stats.ptc511 += rd32(IGC_PTC511);
4707        adapter->stats.ptc1023 += rd32(IGC_PTC1023);
4708        adapter->stats.ptc1522 += rd32(IGC_PTC1522);
4709
4710        adapter->stats.mptc += rd32(IGC_MPTC);
4711        adapter->stats.bptc += rd32(IGC_BPTC);
4712
4713        adapter->stats.tpt += rd32(IGC_TPT);
4714        adapter->stats.colc += rd32(IGC_COLC);
4715        adapter->stats.colc += rd32(IGC_RERC);
4716
4717        adapter->stats.algnerrc += rd32(IGC_ALGNERRC);
4718
4719        adapter->stats.tsctc += rd32(IGC_TSCTC);
4720
4721        adapter->stats.iac += rd32(IGC_IAC);
4722
4723        /* Fill out the OS statistics structure */
4724        net_stats->multicast = adapter->stats.mprc;
4725        net_stats->collisions = adapter->stats.colc;
4726
4727        /* Rx Errors */
4728
4729        /* RLEC on some newer hardware can be incorrect so build
4730         * our own version based on RUC and ROC
4731         */
4732        net_stats->rx_errors = adapter->stats.rxerrc +
4733                adapter->stats.crcerrs + adapter->stats.algnerrc +
4734                adapter->stats.ruc + adapter->stats.roc +
4735                adapter->stats.cexterr;
4736        net_stats->rx_length_errors = adapter->stats.ruc +
4737                                      adapter->stats.roc;
4738        net_stats->rx_crc_errors = adapter->stats.crcerrs;
4739        net_stats->rx_frame_errors = adapter->stats.algnerrc;
4740        net_stats->rx_missed_errors = adapter->stats.mpc;
4741
4742        /* Tx Errors */
4743        net_stats->tx_errors = adapter->stats.ecol +
4744                               adapter->stats.latecol;
4745        net_stats->tx_aborted_errors = adapter->stats.ecol;
4746        net_stats->tx_window_errors = adapter->stats.latecol;
4747        net_stats->tx_carrier_errors = adapter->stats.tncrs;
4748
4749        /* Tx Dropped needs to be maintained elsewhere */
4750
4751        /* Management Stats */
4752        adapter->stats.mgptc += rd32(IGC_MGTPTC);
4753        adapter->stats.mgprc += rd32(IGC_MGTPRC);
4754        adapter->stats.mgpdc += rd32(IGC_MGTPDC);
4755}
4756
4757/**
4758 * igc_down - Close the interface
4759 * @adapter: board private structure
4760 */
4761void igc_down(struct igc_adapter *adapter)
4762{
4763        struct net_device *netdev = adapter->netdev;
4764        struct igc_hw *hw = &adapter->hw;
4765        u32 tctl, rctl;
4766        int i = 0;
4767
4768        set_bit(__IGC_DOWN, &adapter->state);
4769
4770        igc_ptp_suspend(adapter);
4771
4772        if (pci_device_is_present(adapter->pdev)) {
4773                /* disable receives in the hardware */
4774                rctl = rd32(IGC_RCTL);
4775                wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
4776                /* flush and sleep below */
4777        }
4778        /* set trans_start so we don't get spurious watchdogs during reset */
4779        netif_trans_update(netdev);
4780
4781        netif_carrier_off(netdev);
4782        netif_tx_stop_all_queues(netdev);
4783
4784        if (pci_device_is_present(adapter->pdev)) {
4785                /* disable transmits in the hardware */
4786                tctl = rd32(IGC_TCTL);
4787                tctl &= ~IGC_TCTL_EN;
4788                wr32(IGC_TCTL, tctl);
4789                /* flush both disables and wait for them to finish */
4790                wrfl();
4791                usleep_range(10000, 20000);
4792
4793                igc_irq_disable(adapter);
4794        }
4795
4796        adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
4797
4798        for (i = 0; i < adapter->num_q_vectors; i++) {
4799                if (adapter->q_vector[i]) {
4800                        napi_synchronize(&adapter->q_vector[i]->napi);
4801                        napi_disable(&adapter->q_vector[i]->napi);
4802                }
4803        }
4804
4805        del_timer_sync(&adapter->watchdog_timer);
4806        del_timer_sync(&adapter->phy_info_timer);
4807
4808        /* record the stats before reset*/
4809        spin_lock(&adapter->stats64_lock);
4810        igc_update_stats(adapter);
4811        spin_unlock(&adapter->stats64_lock);
4812
4813        adapter->link_speed = 0;
4814        adapter->link_duplex = 0;
4815
4816        if (!pci_channel_offline(adapter->pdev))
4817                igc_reset(adapter);
4818
4819        /* clear VLAN promisc flag so VFTA will be updated if necessary */
4820        adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
4821
4822        igc_clean_all_tx_rings(adapter);
4823        igc_clean_all_rx_rings(adapter);
4824}
4825
4826void igc_reinit_locked(struct igc_adapter *adapter)
4827{
4828        while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
4829                usleep_range(1000, 2000);
4830        igc_down(adapter);
4831        igc_up(adapter);
4832        clear_bit(__IGC_RESETTING, &adapter->state);
4833}
4834
4835static void igc_reset_task(struct work_struct *work)
4836{
4837        struct igc_adapter *adapter;
4838
4839        adapter = container_of(work, struct igc_adapter, reset_task);
4840
4841        rtnl_lock();
4842        /* If we're already down or resetting, just bail */
4843        if (test_bit(__IGC_DOWN, &adapter->state) ||
4844            test_bit(__IGC_RESETTING, &adapter->state)) {
4845                rtnl_unlock();
4846                return;
4847        }
4848
4849        igc_rings_dump(adapter);
4850        igc_regs_dump(adapter);
4851        netdev_err(adapter->netdev, "Reset adapter\n");
4852        igc_reinit_locked(adapter);
4853        rtnl_unlock();
4854}
4855
4856/**
4857 * igc_change_mtu - Change the Maximum Transfer Unit
4858 * @netdev: network interface device structure
4859 * @new_mtu: new value for maximum frame size
4860 *
4861 * Returns 0 on success, negative on failure
4862 */
4863static int igc_change_mtu(struct net_device *netdev, int new_mtu)
4864{
4865        int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4866        struct igc_adapter *adapter = netdev_priv(netdev);
4867
4868        if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) {
4869                netdev_dbg(netdev, "Jumbo frames not supported with XDP");
4870                return -EINVAL;
4871        }
4872
4873        /* adjust max frame to be at least the size of a standard frame */
4874        if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
4875                max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
4876
4877        while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
4878                usleep_range(1000, 2000);
4879
4880        /* igc_down has a dependency on max_frame_size */
4881        adapter->max_frame_size = max_frame;
4882
4883        if (netif_running(netdev))
4884                igc_down(adapter);
4885
4886        netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
4887        netdev->mtu = new_mtu;
4888
4889        if (netif_running(netdev))
4890                igc_up(adapter);
4891        else
4892                igc_reset(adapter);
4893
4894        clear_bit(__IGC_RESETTING, &adapter->state);
4895
4896        return 0;
4897}
4898
4899/**
4900 * igc_get_stats64 - Get System Network Statistics
4901 * @netdev: network interface device structure
4902 * @stats: rtnl_link_stats64 pointer
4903 *
4904 * Returns the address of the device statistics structure.
4905 * The statistics are updated here and also from the timer callback.
4906 */
4907static void igc_get_stats64(struct net_device *netdev,
4908                            struct rtnl_link_stats64 *stats)
4909{
4910        struct igc_adapter *adapter = netdev_priv(netdev);
4911
4912        spin_lock(&adapter->stats64_lock);
4913        if (!test_bit(__IGC_RESETTING, &adapter->state))
4914                igc_update_stats(adapter);
4915        memcpy(stats, &adapter->stats64, sizeof(*stats));
4916        spin_unlock(&adapter->stats64_lock);
4917}
4918
4919static netdev_features_t igc_fix_features(struct net_device *netdev,
4920                                          netdev_features_t features)
4921{
4922        /* Since there is no support for separate Rx/Tx vlan accel
4923         * enable/disable make sure Tx flag is always in same state as Rx.
4924         */
4925        if (features & NETIF_F_HW_VLAN_CTAG_RX)
4926                features |= NETIF_F_HW_VLAN_CTAG_TX;
4927        else
4928                features &= ~NETIF_F_HW_VLAN_CTAG_TX;
4929
4930        return features;
4931}
4932
4933static int igc_set_features(struct net_device *netdev,
4934                            netdev_features_t features)
4935{
4936        netdev_features_t changed = netdev->features ^ features;
4937        struct igc_adapter *adapter = netdev_priv(netdev);
4938
4939        if (changed & NETIF_F_HW_VLAN_CTAG_RX)
4940                igc_vlan_mode(netdev, features);
4941
4942        /* Add VLAN support */
4943        if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
4944                return 0;
4945
4946        if (!(features & NETIF_F_NTUPLE))
4947                igc_flush_nfc_rules(adapter);
4948
4949        netdev->features = features;
4950
4951        if (netif_running(netdev))
4952                igc_reinit_locked(adapter);
4953        else
4954                igc_reset(adapter);
4955
4956        return 1;
4957}
4958
4959static netdev_features_t
4960igc_features_check(struct sk_buff *skb, struct net_device *dev,
4961                   netdev_features_t features)
4962{
4963        unsigned int network_hdr_len, mac_hdr_len;
4964
4965        /* Make certain the headers can be described by a context descriptor */
4966        mac_hdr_len = skb_network_header(skb) - skb->data;
4967        if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN))
4968                return features & ~(NETIF_F_HW_CSUM |
4969                                    NETIF_F_SCTP_CRC |
4970                                    NETIF_F_HW_VLAN_CTAG_TX |
4971                                    NETIF_F_TSO |
4972                                    NETIF_F_TSO6);
4973
4974        network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
4975        if (unlikely(network_hdr_len >  IGC_MAX_NETWORK_HDR_LEN))
4976                return features & ~(NETIF_F_HW_CSUM |
4977                                    NETIF_F_SCTP_CRC |
4978                                    NETIF_F_TSO |
4979                                    NETIF_F_TSO6);
4980
4981        /* We can only support IPv4 TSO in tunnels if we can mangle the
4982         * inner IP ID field, so strip TSO if MANGLEID is not supported.
4983         */
4984        if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
4985                features &= ~NETIF_F_TSO;
4986
4987        return features;
4988}
4989
4990static void igc_tsync_interrupt(struct igc_adapter *adapter)
4991{
4992        u32 ack, tsauxc, sec, nsec, tsicr;
4993        struct igc_hw *hw = &adapter->hw;
4994        struct ptp_clock_event event;
4995        struct timespec64 ts;
4996
4997        tsicr = rd32(IGC_TSICR);
4998        ack = 0;
4999
5000        if (tsicr & IGC_TSICR_SYS_WRAP) {
5001                event.type = PTP_CLOCK_PPS;
5002                if (adapter->ptp_caps.pps)
5003                        ptp_clock_event(adapter->ptp_clock, &event);
5004                ack |= IGC_TSICR_SYS_WRAP;
5005        }
5006
5007        if (tsicr & IGC_TSICR_TXTS) {
5008                /* retrieve hardware timestamp */
5009                schedule_work(&adapter->ptp_tx_work);
5010                ack |= IGC_TSICR_TXTS;
5011        }
5012
5013        if (tsicr & IGC_TSICR_TT0) {
5014                spin_lock(&adapter->tmreg_lock);
5015                ts = timespec64_add(adapter->perout[0].start,
5016                                    adapter->perout[0].period);
5017                wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
5018                wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec);
5019                tsauxc = rd32(IGC_TSAUXC);
5020                tsauxc |= IGC_TSAUXC_EN_TT0;
5021                wr32(IGC_TSAUXC, tsauxc);
5022                adapter->perout[0].start = ts;
5023                spin_unlock(&adapter->tmreg_lock);
5024                ack |= IGC_TSICR_TT0;
5025        }
5026
5027        if (tsicr & IGC_TSICR_TT1) {
5028                spin_lock(&adapter->tmreg_lock);
5029                ts = timespec64_add(adapter->perout[1].start,
5030                                    adapter->perout[1].period);
5031                wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
5032                wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec);
5033                tsauxc = rd32(IGC_TSAUXC);
5034                tsauxc |= IGC_TSAUXC_EN_TT1;
5035                wr32(IGC_TSAUXC, tsauxc);
5036                adapter->perout[1].start = ts;
5037                spin_unlock(&adapter->tmreg_lock);
5038                ack |= IGC_TSICR_TT1;
5039        }
5040
5041        if (tsicr & IGC_TSICR_AUTT0) {
5042                nsec = rd32(IGC_AUXSTMPL0);
5043                sec  = rd32(IGC_AUXSTMPH0);
5044                event.type = PTP_CLOCK_EXTTS;
5045                event.index = 0;
5046                event.timestamp = sec * NSEC_PER_SEC + nsec;
5047                ptp_clock_event(adapter->ptp_clock, &event);
5048                ack |= IGC_TSICR_AUTT0;
5049        }
5050
5051        if (tsicr & IGC_TSICR_AUTT1) {
5052                nsec = rd32(IGC_AUXSTMPL1);
5053                sec  = rd32(IGC_AUXSTMPH1);
5054                event.type = PTP_CLOCK_EXTTS;
5055                event.index = 1;
5056                event.timestamp = sec * NSEC_PER_SEC + nsec;
5057                ptp_clock_event(adapter->ptp_clock, &event);
5058                ack |= IGC_TSICR_AUTT1;
5059        }
5060
5061        /* acknowledge the interrupts */
5062        wr32(IGC_TSICR, ack);
5063}
5064
5065/**
5066 * igc_msix_other - msix other interrupt handler
5067 * @irq: interrupt number
5068 * @data: pointer to a q_vector
5069 */
5070static irqreturn_t igc_msix_other(int irq, void *data)
5071{
5072        struct igc_adapter *adapter = data;
5073        struct igc_hw *hw = &adapter->hw;
5074        u32 icr = rd32(IGC_ICR);
5075
5076        /* reading ICR causes bit 31 of EICR to be cleared */
5077        if (icr & IGC_ICR_DRSTA)
5078                schedule_work(&adapter->reset_task);
5079
5080        if (icr & IGC_ICR_DOUTSYNC) {
5081                /* HW is reporting DMA is out of sync */
5082                adapter->stats.doosync++;
5083        }
5084
5085        if (icr & IGC_ICR_LSC) {
5086                hw->mac.get_link_status = true;
5087                /* guard against interrupt when we're going down */
5088                if (!test_bit(__IGC_DOWN, &adapter->state))
5089                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5090        }
5091
5092        if (icr & IGC_ICR_TS)
5093                igc_tsync_interrupt(adapter);
5094
5095        wr32(IGC_EIMS, adapter->eims_other);
5096
5097        return IRQ_HANDLED;
5098}
5099
5100static void igc_write_itr(struct igc_q_vector *q_vector)
5101{
5102        u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
5103
5104        if (!q_vector->set_itr)
5105                return;
5106
5107        if (!itr_val)
5108                itr_val = IGC_ITR_VAL_MASK;
5109
5110        itr_val |= IGC_EITR_CNT_IGNR;
5111
5112        writel(itr_val, q_vector->itr_register);
5113        q_vector->set_itr = 0;
5114}
5115
5116static irqreturn_t igc_msix_ring(int irq, void *data)
5117{
5118        struct igc_q_vector *q_vector = data;
5119
5120        /* Write the ITR value calculated from the previous interrupt. */
5121        igc_write_itr(q_vector);
5122
5123        napi_schedule(&q_vector->napi);
5124
5125        return IRQ_HANDLED;
5126}
5127
5128/**
5129 * igc_request_msix - Initialize MSI-X interrupts
5130 * @adapter: Pointer to adapter structure
5131 *
5132 * igc_request_msix allocates MSI-X vectors and requests interrupts from the
5133 * kernel.
5134 */
5135static int igc_request_msix(struct igc_adapter *adapter)
5136{
5137        unsigned int num_q_vectors = adapter->num_q_vectors;
5138        int i = 0, err = 0, vector = 0, free_vector = 0;
5139        struct net_device *netdev = adapter->netdev;
5140
5141        err = request_irq(adapter->msix_entries[vector].vector,
5142                          &igc_msix_other, 0, netdev->name, adapter);
5143        if (err)
5144                goto err_out;
5145
5146        if (num_q_vectors > MAX_Q_VECTORS) {
5147                num_q_vectors = MAX_Q_VECTORS;
5148                dev_warn(&adapter->pdev->dev,
5149                         "The number of queue vectors (%d) is higher than max allowed (%d)\n",
5150                         adapter->num_q_vectors, MAX_Q_VECTORS);
5151        }
5152        for (i = 0; i < num_q_vectors; i++) {
5153                struct igc_q_vector *q_vector = adapter->q_vector[i];
5154
5155                vector++;
5156
5157                q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
5158
5159                if (q_vector->rx.ring && q_vector->tx.ring)
5160                        sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
5161                                q_vector->rx.ring->queue_index);
5162                else if (q_vector->tx.ring)
5163                        sprintf(q_vector->name, "%s-tx-%u", netdev->name,
5164                                q_vector->tx.ring->queue_index);
5165                else if (q_vector->rx.ring)
5166                        sprintf(q_vector->name, "%s-rx-%u", netdev->name,
5167                                q_vector->rx.ring->queue_index);
5168                else
5169                        sprintf(q_vector->name, "%s-unused", netdev->name);
5170
5171                err = request_irq(adapter->msix_entries[vector].vector,
5172                                  igc_msix_ring, 0, q_vector->name,
5173                                  q_vector);
5174                if (err)
5175                        goto err_free;
5176        }
5177
5178        igc_configure_msix(adapter);
5179        return 0;
5180
5181err_free:
5182        /* free already assigned IRQs */
5183        free_irq(adapter->msix_entries[free_vector++].vector, adapter);
5184
5185        vector--;
5186        for (i = 0; i < vector; i++) {
5187                free_irq(adapter->msix_entries[free_vector++].vector,
5188                         adapter->q_vector[i]);
5189        }
5190err_out:
5191        return err;
5192}
5193
5194/**
5195 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
5196 * @adapter: Pointer to adapter structure
5197 *
5198 * This function resets the device so that it has 0 rx queues, tx queues, and
5199 * MSI-X interrupts allocated.
5200 */
5201static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
5202{
5203        igc_free_q_vectors(adapter);
5204        igc_reset_interrupt_capability(adapter);
5205}
5206
5207/* Need to wait a few seconds after link up to get diagnostic information from
5208 * the phy
5209 */
5210static void igc_update_phy_info(struct timer_list *t)
5211{
5212        struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
5213
5214        igc_get_phy_info(&adapter->hw);
5215}
5216
5217/**
5218 * igc_has_link - check shared code for link and determine up/down
5219 * @adapter: pointer to driver private info
5220 */
5221bool igc_has_link(struct igc_adapter *adapter)
5222{
5223        struct igc_hw *hw = &adapter->hw;
5224        bool link_active = false;
5225
5226        /* get_link_status is set on LSC (link status) interrupt or
5227         * rx sequence error interrupt.  get_link_status will stay
5228         * false until the igc_check_for_link establishes link
5229         * for copper adapters ONLY
5230         */
5231        if (!hw->mac.get_link_status)
5232                return true;
5233        hw->mac.ops.check_for_link(hw);
5234        link_active = !hw->mac.get_link_status;
5235
5236        if (hw->mac.type == igc_i225) {
5237                if (!netif_carrier_ok(adapter->netdev)) {
5238                        adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
5239                } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
5240                        adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
5241                        adapter->link_check_timeout = jiffies;
5242                }
5243        }
5244
5245        return link_active;
5246}
5247
5248/**
5249 * igc_watchdog - Timer Call-back
5250 * @t: timer for the watchdog
5251 */
5252static void igc_watchdog(struct timer_list *t)
5253{
5254        struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
5255        /* Do the rest outside of interrupt context */
5256        schedule_work(&adapter->watchdog_task);
5257}
5258
5259static void igc_watchdog_task(struct work_struct *work)
5260{
5261        struct igc_adapter *adapter = container_of(work,
5262                                                   struct igc_adapter,
5263                                                   watchdog_task);
5264        struct net_device *netdev = adapter->netdev;
5265        struct igc_hw *hw = &adapter->hw;
5266        struct igc_phy_info *phy = &hw->phy;
5267        u16 phy_data, retry_count = 20;
5268        u32 link;
5269        int i;
5270
5271        link = igc_has_link(adapter);
5272
5273        if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
5274                if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
5275                        adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
5276                else
5277                        link = false;
5278        }
5279
5280        if (link) {
5281                /* Cancel scheduled suspend requests. */
5282                pm_runtime_resume(netdev->dev.parent);
5283
5284                if (!netif_carrier_ok(netdev)) {
5285                        u32 ctrl;
5286
5287                        hw->mac.ops.get_speed_and_duplex(hw,
5288                                                         &adapter->link_speed,
5289                                                         &adapter->link_duplex);
5290
5291                        ctrl = rd32(IGC_CTRL);
5292                        /* Link status message must follow this format */
5293                        netdev_info(netdev,
5294                                    "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
5295                                    adapter->link_speed,
5296                                    adapter->link_duplex == FULL_DUPLEX ?
5297                                    "Full" : "Half",
5298                                    (ctrl & IGC_CTRL_TFCE) &&
5299                                    (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
5300                                    (ctrl & IGC_CTRL_RFCE) ?  "RX" :
5301                                    (ctrl & IGC_CTRL_TFCE) ?  "TX" : "None");
5302
5303                        /* disable EEE if enabled */
5304                        if ((adapter->flags & IGC_FLAG_EEE) &&
5305                            adapter->link_duplex == HALF_DUPLEX) {
5306                                netdev_info(netdev,
5307                                            "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n");
5308                                adapter->hw.dev_spec._base.eee_enable = false;
5309                                adapter->flags &= ~IGC_FLAG_EEE;
5310                        }
5311
5312                        /* check if SmartSpeed worked */
5313                        igc_check_downshift(hw);
5314                        if (phy->speed_downgraded)
5315                                netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
5316
5317                        /* adjust timeout factor according to speed/duplex */
5318                        adapter->tx_timeout_factor = 1;
5319                        switch (adapter->link_speed) {
5320                        case SPEED_10:
5321                                adapter->tx_timeout_factor = 14;
5322                                break;
5323                        case SPEED_100:
5324                        case SPEED_1000:
5325                        case SPEED_2500:
5326                                adapter->tx_timeout_factor = 7;
5327                                break;
5328                        }
5329
5330                        if (adapter->link_speed != SPEED_1000)
5331                                goto no_wait;
5332
5333                        /* wait for Remote receiver status OK */
5334retry_read_status:
5335                        if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
5336                                              &phy_data)) {
5337                                if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
5338                                    retry_count) {
5339                                        msleep(100);
5340                                        retry_count--;
5341                                        goto retry_read_status;
5342                                } else if (!retry_count) {
5343                                        netdev_err(netdev, "exceed max 2 second\n");
5344                                }
5345                        } else {
5346                                netdev_err(netdev, "read 1000Base-T Status Reg\n");
5347                        }
5348no_wait:
5349                        netif_carrier_on(netdev);
5350
5351                        /* link state has changed, schedule phy info update */
5352                        if (!test_bit(__IGC_DOWN, &adapter->state))
5353                                mod_timer(&adapter->phy_info_timer,
5354                                          round_jiffies(jiffies + 2 * HZ));
5355                }
5356        } else {
5357                if (netif_carrier_ok(netdev)) {
5358                        adapter->link_speed = 0;
5359                        adapter->link_duplex = 0;
5360
5361                        /* Links status message must follow this format */
5362                        netdev_info(netdev, "NIC Link is Down\n");
5363                        netif_carrier_off(netdev);
5364
5365                        /* link state has changed, schedule phy info update */
5366                        if (!test_bit(__IGC_DOWN, &adapter->state))
5367                                mod_timer(&adapter->phy_info_timer,
5368                                          round_jiffies(jiffies + 2 * HZ));
5369
5370                        /* link is down, time to check for alternate media */
5371                        if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
5372                                if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
5373                                        schedule_work(&adapter->reset_task);
5374                                        /* return immediately */
5375                                        return;
5376                                }
5377                        }
5378                        pm_schedule_suspend(netdev->dev.parent,
5379                                            MSEC_PER_SEC * 5);
5380
5381                /* also check for alternate media here */
5382                } else if (!netif_carrier_ok(netdev) &&
5383                           (adapter->flags & IGC_FLAG_MAS_ENABLE)) {
5384                        if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
5385                                schedule_work(&adapter->reset_task);
5386                                /* return immediately */
5387                                return;
5388                        }
5389                }
5390        }
5391
5392        spin_lock(&adapter->stats64_lock);
5393        igc_update_stats(adapter);
5394        spin_unlock(&adapter->stats64_lock);
5395
5396        for (i = 0; i < adapter->num_tx_queues; i++) {
5397                struct igc_ring *tx_ring = adapter->tx_ring[i];
5398
5399                if (!netif_carrier_ok(netdev)) {
5400                        /* We've lost link, so the controller stops DMA,
5401                         * but we've got queued Tx work that's never going
5402                         * to get done, so reset controller to flush Tx.
5403                         * (Do the reset outside of interrupt context).
5404                         */
5405                        if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
5406                                adapter->tx_timeout_count++;
5407                                schedule_work(&adapter->reset_task);
5408                                /* return immediately since reset is imminent */
5409                                return;
5410                        }
5411                }
5412
5413                /* Force detection of hung controller every watchdog period */
5414                set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5415        }
5416
5417        /* Cause software interrupt to ensure Rx ring is cleaned */
5418        if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5419                u32 eics = 0;
5420
5421                for (i = 0; i < adapter->num_q_vectors; i++)
5422                        eics |= adapter->q_vector[i]->eims_value;
5423                wr32(IGC_EICS, eics);
5424        } else {
5425                wr32(IGC_ICS, IGC_ICS_RXDMT0);
5426        }
5427
5428        igc_ptp_tx_hang(adapter);
5429
5430        /* Reset the timer */
5431        if (!test_bit(__IGC_DOWN, &adapter->state)) {
5432                if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
5433                        mod_timer(&adapter->watchdog_timer,
5434                                  round_jiffies(jiffies +  HZ));
5435                else
5436                        mod_timer(&adapter->watchdog_timer,
5437                                  round_jiffies(jiffies + 2 * HZ));
5438        }
5439}
5440
5441/**
5442 * igc_intr_msi - Interrupt Handler
5443 * @irq: interrupt number
5444 * @data: pointer to a network interface device structure
5445 */
5446static irqreturn_t igc_intr_msi(int irq, void *data)
5447{
5448        struct igc_adapter *adapter = data;
5449        struct igc_q_vector *q_vector = adapter->q_vector[0];
5450        struct igc_hw *hw = &adapter->hw;
5451        /* read ICR disables interrupts using IAM */
5452        u32 icr = rd32(IGC_ICR);
5453
5454        igc_write_itr(q_vector);
5455
5456        if (icr & IGC_ICR_DRSTA)
5457                schedule_work(&adapter->reset_task);
5458
5459        if (icr & IGC_ICR_DOUTSYNC) {
5460                /* HW is reporting DMA is out of sync */
5461                adapter->stats.doosync++;
5462        }
5463
5464        if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5465                hw->mac.get_link_status = true;
5466                if (!test_bit(__IGC_DOWN, &adapter->state))
5467                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5468        }
5469
5470        if (icr & IGC_ICR_TS)
5471                igc_tsync_interrupt(adapter);
5472
5473        napi_schedule(&q_vector->napi);
5474
5475        return IRQ_HANDLED;
5476}
5477
5478/**
5479 * igc_intr - Legacy Interrupt Handler
5480 * @irq: interrupt number
5481 * @data: pointer to a network interface device structure
5482 */
5483static irqreturn_t igc_intr(int irq, void *data)
5484{
5485        struct igc_adapter *adapter = data;
5486        struct igc_q_vector *q_vector = adapter->q_vector[0];
5487        struct igc_hw *hw = &adapter->hw;
5488        /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5489         * need for the IMC write
5490         */
5491        u32 icr = rd32(IGC_ICR);
5492
5493        /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5494         * not set, then the adapter didn't send an interrupt
5495         */
5496        if (!(icr & IGC_ICR_INT_ASSERTED))
5497                return IRQ_NONE;
5498
5499        igc_write_itr(q_vector);
5500
5501        if (icr & IGC_ICR_DRSTA)
5502                schedule_work(&adapter->reset_task);
5503
5504        if (icr & IGC_ICR_DOUTSYNC) {
5505                /* HW is reporting DMA is out of sync */
5506                adapter->stats.doosync++;
5507        }
5508
5509        if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5510                hw->mac.get_link_status = true;
5511                /* guard against interrupt when we're going down */
5512                if (!test_bit(__IGC_DOWN, &adapter->state))
5513                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5514        }
5515
5516        if (icr & IGC_ICR_TS)
5517                igc_tsync_interrupt(adapter);
5518
5519        napi_schedule(&q_vector->napi);
5520
5521        return IRQ_HANDLED;
5522}
5523
5524static void igc_free_irq(struct igc_adapter *adapter)
5525{
5526        if (adapter->msix_entries) {
5527                int vector = 0, i;
5528
5529                free_irq(adapter->msix_entries[vector++].vector, adapter);
5530
5531                for (i = 0; i < adapter->num_q_vectors; i++)
5532                        free_irq(adapter->msix_entries[vector++].vector,
5533                                 adapter->q_vector[i]);
5534        } else {
5535                free_irq(adapter->pdev->irq, adapter);
5536        }
5537}
5538
5539/**
5540 * igc_request_irq - initialize interrupts
5541 * @adapter: Pointer to adapter structure
5542 *
5543 * Attempts to configure interrupts using the best available
5544 * capabilities of the hardware and kernel.
5545 */
5546static int igc_request_irq(struct igc_adapter *adapter)
5547{
5548        struct net_device *netdev = adapter->netdev;
5549        struct pci_dev *pdev = adapter->pdev;
5550        int err = 0;
5551
5552        if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5553                err = igc_request_msix(adapter);
5554                if (!err)
5555                        goto request_done;
5556                /* fall back to MSI */
5557                igc_free_all_tx_resources(adapter);
5558                igc_free_all_rx_resources(adapter);
5559
5560                igc_clear_interrupt_scheme(adapter);
5561                err = igc_init_interrupt_scheme(adapter, false);
5562                if (err)
5563                        goto request_done;
5564                igc_setup_all_tx_resources(adapter);
5565                igc_setup_all_rx_resources(adapter);
5566                igc_configure(adapter);
5567        }
5568
5569        igc_assign_vector(adapter->q_vector[0], 0);
5570
5571        if (adapter->flags & IGC_FLAG_HAS_MSI) {
5572                err = request_irq(pdev->irq, &igc_intr_msi, 0,
5573                                  netdev->name, adapter);
5574                if (!err)
5575                        goto request_done;
5576
5577                /* fall back to legacy interrupts */
5578                igc_reset_interrupt_capability(adapter);
5579                adapter->flags &= ~IGC_FLAG_HAS_MSI;
5580        }
5581
5582        err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
5583                          netdev->name, adapter);
5584
5585        if (err)
5586                netdev_err(netdev, "Error %d getting interrupt\n", err);
5587
5588request_done:
5589        return err;
5590}
5591
5592/**
5593 * __igc_open - Called when a network interface is made active
5594 * @netdev: network interface device structure
5595 * @resuming: boolean indicating if the device is resuming
5596 *
5597 * Returns 0 on success, negative value on failure
5598 *
5599 * The open entry point is called when a network interface is made
5600 * active by the system (IFF_UP).  At this point all resources needed
5601 * for transmit and receive operations are allocated, the interrupt
5602 * handler is registered with the OS, the watchdog timer is started,
5603 * and the stack is notified that the interface is ready.
5604 */
5605static int __igc_open(struct net_device *netdev, bool resuming)
5606{
5607        struct igc_adapter *adapter = netdev_priv(netdev);
5608        struct pci_dev *pdev = adapter->pdev;
5609        struct igc_hw *hw = &adapter->hw;
5610        int err = 0;
5611        int i = 0;
5612
5613        /* disallow open during test */
5614
5615        if (test_bit(__IGC_TESTING, &adapter->state)) {
5616                WARN_ON(resuming);
5617                return -EBUSY;
5618        }
5619
5620        if (!resuming)
5621                pm_runtime_get_sync(&pdev->dev);
5622
5623        netif_carrier_off(netdev);
5624
5625        /* allocate transmit descriptors */
5626        err = igc_setup_all_tx_resources(adapter);
5627        if (err)
5628                goto err_setup_tx;
5629
5630        /* allocate receive descriptors */
5631        err = igc_setup_all_rx_resources(adapter);
5632        if (err)
5633                goto err_setup_rx;
5634
5635        igc_power_up_link(adapter);
5636
5637        igc_configure(adapter);
5638
5639        err = igc_request_irq(adapter);
5640        if (err)
5641                goto err_req_irq;
5642
5643        /* Notify the stack of the actual queue counts. */
5644        err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
5645        if (err)
5646                goto err_set_queues;
5647
5648        err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
5649        if (err)
5650                goto err_set_queues;
5651
5652        clear_bit(__IGC_DOWN, &adapter->state);
5653
5654        for (i = 0; i < adapter->num_q_vectors; i++)
5655                napi_enable(&adapter->q_vector[i]->napi);
5656
5657        /* Clear any pending interrupts. */
5658        rd32(IGC_ICR);
5659        igc_irq_enable(adapter);
5660
5661        if (!resuming)
5662                pm_runtime_put(&pdev->dev);
5663
5664        netif_tx_start_all_queues(netdev);
5665
5666        /* start the watchdog. */
5667        hw->mac.get_link_status = true;
5668        schedule_work(&adapter->watchdog_task);
5669
5670        return IGC_SUCCESS;
5671
5672err_set_queues:
5673        igc_free_irq(adapter);
5674err_req_irq:
5675        igc_release_hw_control(adapter);
5676        igc_power_down_phy_copper_base(&adapter->hw);
5677        igc_free_all_rx_resources(adapter);
5678err_setup_rx:
5679        igc_free_all_tx_resources(adapter);
5680err_setup_tx:
5681        igc_reset(adapter);
5682        if (!resuming)
5683                pm_runtime_put(&pdev->dev);
5684
5685        return err;
5686}
5687
5688int igc_open(struct net_device *netdev)
5689{
5690        return __igc_open(netdev, false);
5691}
5692
5693/**
5694 * __igc_close - Disables a network interface
5695 * @netdev: network interface device structure
5696 * @suspending: boolean indicating the device is suspending
5697 *
5698 * Returns 0, this is not allowed to fail
5699 *
5700 * The close entry point is called when an interface is de-activated
5701 * by the OS.  The hardware is still under the driver's control, but
5702 * needs to be disabled.  A global MAC reset is issued to stop the
5703 * hardware, and all transmit and receive resources are freed.
5704 */
5705static int __igc_close(struct net_device *netdev, bool suspending)
5706{
5707        struct igc_adapter *adapter = netdev_priv(netdev);
5708        struct pci_dev *pdev = adapter->pdev;
5709
5710        WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
5711
5712        if (!suspending)
5713                pm_runtime_get_sync(&pdev->dev);
5714
5715        igc_down(adapter);
5716
5717        igc_release_hw_control(adapter);
5718
5719        igc_free_irq(adapter);
5720
5721        igc_free_all_tx_resources(adapter);
5722        igc_free_all_rx_resources(adapter);
5723
5724        if (!suspending)
5725                pm_runtime_put_sync(&pdev->dev);
5726
5727        return 0;
5728}
5729
5730int igc_close(struct net_device *netdev)
5731{
5732        if (netif_device_present(netdev) || netdev->dismantle)
5733                return __igc_close(netdev, false);
5734        return 0;
5735}
5736
5737/**
5738 * igc_ioctl - Access the hwtstamp interface
5739 * @netdev: network interface device structure
5740 * @ifr: interface request data
5741 * @cmd: ioctl command
5742 **/
5743static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5744{
5745        switch (cmd) {
5746        case SIOCGHWTSTAMP:
5747                return igc_ptp_get_ts_config(netdev, ifr);
5748        case SIOCSHWTSTAMP:
5749                return igc_ptp_set_ts_config(netdev, ifr);
5750        default:
5751                return -EOPNOTSUPP;
5752        }
5753}
5754
5755static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
5756                                      bool enable)
5757{
5758        struct igc_ring *ring;
5759
5760        if (queue < 0 || queue >= adapter->num_tx_queues)
5761                return -EINVAL;
5762
5763        ring = adapter->tx_ring[queue];
5764        ring->launchtime_enable = enable;
5765
5766        return 0;
5767}
5768
5769static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now)
5770{
5771        struct timespec64 b;
5772
5773        b = ktime_to_timespec64(base_time);
5774
5775        return timespec64_compare(now, &b) > 0;
5776}
5777
5778static bool validate_schedule(struct igc_adapter *adapter,
5779                              const struct tc_taprio_qopt_offload *qopt)
5780{
5781        int queue_uses[IGC_MAX_TX_QUEUES] = { };
5782        struct timespec64 now;
5783        size_t n;
5784
5785        if (qopt->cycle_time_extension)
5786                return false;
5787
5788        igc_ptp_read(adapter, &now);
5789
5790        /* If we program the controller's BASET registers with a time
5791         * in the future, it will hold all the packets until that
5792         * time, causing a lot of TX Hangs, so to avoid that, we
5793         * reject schedules that would start in the future.
5794         */
5795        if (!is_base_time_past(qopt->base_time, &now))
5796                return false;
5797
5798        for (n = 0; n < qopt->num_entries; n++) {
5799                const struct tc_taprio_sched_entry *e;
5800                int i;
5801
5802                e = &qopt->entries[n];
5803
5804                /* i225 only supports "global" frame preemption
5805                 * settings.
5806                 */
5807                if (e->command != TC_TAPRIO_CMD_SET_GATES)
5808                        return false;
5809
5810                for (i = 0; i < adapter->num_tx_queues; i++) {
5811                        if (e->gate_mask & BIT(i))
5812                                queue_uses[i]++;
5813
5814                        if (queue_uses[i] > 1)
5815                                return false;
5816                }
5817        }
5818
5819        return true;
5820}
5821
5822static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
5823                                     struct tc_etf_qopt_offload *qopt)
5824{
5825        struct igc_hw *hw = &adapter->hw;
5826        int err;
5827
5828        if (hw->mac.type != igc_i225)
5829                return -EOPNOTSUPP;
5830
5831        err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable);
5832        if (err)
5833                return err;
5834
5835        return igc_tsn_offload_apply(adapter);
5836}
5837
5838static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
5839{
5840        int i;
5841
5842        adapter->base_time = 0;
5843        adapter->cycle_time = NSEC_PER_SEC;
5844
5845        for (i = 0; i < adapter->num_tx_queues; i++) {
5846                struct igc_ring *ring = adapter->tx_ring[i];
5847
5848                ring->start_time = 0;
5849                ring->end_time = NSEC_PER_SEC;
5850        }
5851
5852        return 0;
5853}
5854
5855static int igc_save_qbv_schedule(struct igc_adapter *adapter,
5856                                 struct tc_taprio_qopt_offload *qopt)
5857{
5858        u32 start_time = 0, end_time = 0;
5859        size_t n;
5860
5861        if (!qopt->enable)
5862                return igc_tsn_clear_schedule(adapter);
5863
5864        if (adapter->base_time)
5865                return -EALREADY;
5866
5867        if (!validate_schedule(adapter, qopt))
5868                return -EINVAL;
5869
5870        adapter->cycle_time = qopt->cycle_time;
5871        adapter->base_time = qopt->base_time;
5872
5873        /* FIXME: be a little smarter about cases when the gate for a
5874         * queue stays open for more than one entry.
5875         */
5876        for (n = 0; n < qopt->num_entries; n++) {
5877                struct tc_taprio_sched_entry *e = &qopt->entries[n];
5878                int i;
5879
5880                end_time += e->interval;
5881
5882                for (i = 0; i < adapter->num_tx_queues; i++) {
5883                        struct igc_ring *ring = adapter->tx_ring[i];
5884
5885                        if (!(e->gate_mask & BIT(i)))
5886                                continue;
5887
5888                        ring->start_time = start_time;
5889                        ring->end_time = end_time;
5890                }
5891
5892                start_time += e->interval;
5893        }
5894
5895        return 0;
5896}
5897
5898static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
5899                                         struct tc_taprio_qopt_offload *qopt)
5900{
5901        struct igc_hw *hw = &adapter->hw;
5902        int err;
5903
5904        if (hw->mac.type != igc_i225)
5905                return -EOPNOTSUPP;
5906
5907        err = igc_save_qbv_schedule(adapter, qopt);
5908        if (err)
5909                return err;
5910
5911        return igc_tsn_offload_apply(adapter);
5912}
5913
5914static int igc_save_cbs_params(struct igc_adapter *adapter, int queue,
5915                               bool enable, int idleslope, int sendslope,
5916                               int hicredit, int locredit)
5917{
5918        bool cbs_status[IGC_MAX_SR_QUEUES] = { false };
5919        struct net_device *netdev = adapter->netdev;
5920        struct igc_ring *ring;
5921        int i;
5922
5923        /* i225 has two sets of credit-based shaper logic.
5924         * Supporting it only on the top two priority queues
5925         */
5926        if (queue < 0 || queue > 1)
5927                return -EINVAL;
5928
5929        ring = adapter->tx_ring[queue];
5930
5931        for (i = 0; i < IGC_MAX_SR_QUEUES; i++)
5932                if (adapter->tx_ring[i])
5933                        cbs_status[i] = adapter->tx_ring[i]->cbs_enable;
5934
5935        /* CBS should be enabled on the highest priority queue first in order
5936         * for the CBS algorithm to operate as intended.
5937         */
5938        if (enable) {
5939                if (queue == 1 && !cbs_status[0]) {
5940                        netdev_err(netdev,
5941                                   "Enabling CBS on queue1 before queue0\n");
5942                        return -EINVAL;
5943                }
5944        } else {
5945                if (queue == 0 && cbs_status[1]) {
5946                        netdev_err(netdev,
5947                                   "Disabling CBS on queue0 before queue1\n");
5948                        return -EINVAL;
5949                }
5950        }
5951
5952        ring->cbs_enable = enable;
5953        ring->idleslope = idleslope;
5954        ring->sendslope = sendslope;
5955        ring->hicredit = hicredit;
5956        ring->locredit = locredit;
5957
5958        return 0;
5959}
5960
5961static int igc_tsn_enable_cbs(struct igc_adapter *adapter,
5962                              struct tc_cbs_qopt_offload *qopt)
5963{
5964        struct igc_hw *hw = &adapter->hw;
5965        int err;
5966
5967        if (hw->mac.type != igc_i225)
5968                return -EOPNOTSUPP;
5969
5970        if (qopt->queue < 0 || qopt->queue > 1)
5971                return -EINVAL;
5972
5973        err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable,
5974                                  qopt->idleslope, qopt->sendslope,
5975                                  qopt->hicredit, qopt->locredit);
5976        if (err)
5977                return err;
5978
5979        return igc_tsn_offload_apply(adapter);
5980}
5981
5982static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
5983                        void *type_data)
5984{
5985        struct igc_adapter *adapter = netdev_priv(dev);
5986
5987        switch (type) {
5988        case TC_SETUP_QDISC_TAPRIO:
5989                return igc_tsn_enable_qbv_scheduling(adapter, type_data);
5990
5991        case TC_SETUP_QDISC_ETF:
5992                return igc_tsn_enable_launchtime(adapter, type_data);
5993
5994        case TC_SETUP_QDISC_CBS:
5995                return igc_tsn_enable_cbs(adapter, type_data);
5996
5997        default:
5998                return -EOPNOTSUPP;
5999        }
6000}
6001
6002static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
6003{
6004        struct igc_adapter *adapter = netdev_priv(dev);
6005
6006        switch (bpf->command) {
6007        case XDP_SETUP_PROG:
6008                return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
6009        case XDP_SETUP_XSK_POOL:
6010                return igc_xdp_setup_pool(adapter, bpf->xsk.pool,
6011                                          bpf->xsk.queue_id);
6012        default:
6013                return -EOPNOTSUPP;
6014        }
6015}
6016
6017static int igc_xdp_xmit(struct net_device *dev, int num_frames,
6018                        struct xdp_frame **frames, u32 flags)
6019{
6020        struct igc_adapter *adapter = netdev_priv(dev);
6021        int cpu = smp_processor_id();
6022        struct netdev_queue *nq;
6023        struct igc_ring *ring;
6024        int i, drops;
6025
6026        if (unlikely(test_bit(__IGC_DOWN, &adapter->state)))
6027                return -ENETDOWN;
6028
6029        if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
6030                return -EINVAL;
6031
6032        ring = igc_xdp_get_tx_ring(adapter, cpu);
6033        nq = txring_txq(ring);
6034
6035        __netif_tx_lock(nq, cpu);
6036
6037        drops = 0;
6038        for (i = 0; i < num_frames; i++) {
6039                int err;
6040                struct xdp_frame *xdpf = frames[i];
6041
6042                err = igc_xdp_init_tx_descriptor(ring, xdpf);
6043                if (err) {
6044                        xdp_return_frame_rx_napi(xdpf);
6045                        drops++;
6046                }
6047        }
6048
6049        if (flags & XDP_XMIT_FLUSH)
6050                igc_flush_tx_descriptors(ring);
6051
6052        __netif_tx_unlock(nq);
6053
6054        return num_frames - drops;
6055}
6056
6057static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
6058                                        struct igc_q_vector *q_vector)
6059{
6060        struct igc_hw *hw = &adapter->hw;
6061        u32 eics = 0;
6062
6063        eics |= q_vector->eims_value;
6064        wr32(IGC_EICS, eics);
6065}
6066
6067int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
6068{
6069        struct igc_adapter *adapter = netdev_priv(dev);
6070        struct igc_q_vector *q_vector;
6071        struct igc_ring *ring;
6072
6073        if (test_bit(__IGC_DOWN, &adapter->state))
6074                return -ENETDOWN;
6075
6076        if (!igc_xdp_is_enabled(adapter))
6077                return -ENXIO;
6078
6079        if (queue_id >= adapter->num_rx_queues)
6080                return -EINVAL;
6081
6082        ring = adapter->rx_ring[queue_id];
6083
6084        if (!ring->xsk_pool)
6085                return -ENXIO;
6086
6087        q_vector = adapter->q_vector[queue_id];
6088        if (!napi_if_scheduled_mark_missed(&q_vector->napi))
6089                igc_trigger_rxtxq_interrupt(adapter, q_vector);
6090
6091        return 0;
6092}
6093
6094static const struct net_device_ops igc_netdev_ops = {
6095        .ndo_open               = igc_open,
6096        .ndo_stop               = igc_close,
6097        .ndo_start_xmit         = igc_xmit_frame,
6098        .ndo_set_rx_mode        = igc_set_rx_mode,
6099        .ndo_set_mac_address    = igc_set_mac,
6100        .ndo_change_mtu         = igc_change_mtu,
6101        .ndo_get_stats64        = igc_get_stats64,
6102        .ndo_fix_features       = igc_fix_features,
6103        .ndo_set_features       = igc_set_features,
6104        .ndo_features_check     = igc_features_check,
6105        .ndo_eth_ioctl          = igc_ioctl,
6106        .ndo_setup_tc           = igc_setup_tc,
6107        .ndo_bpf                = igc_bpf,
6108        .ndo_xdp_xmit           = igc_xdp_xmit,
6109        .ndo_xsk_wakeup         = igc_xsk_wakeup,
6110};
6111
6112/* PCIe configuration access */
6113void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
6114{
6115        struct igc_adapter *adapter = hw->back;
6116
6117        pci_read_config_word(adapter->pdev, reg, value);
6118}
6119
6120void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
6121{
6122        struct igc_adapter *adapter = hw->back;
6123
6124        pci_write_config_word(adapter->pdev, reg, *value);
6125}
6126
6127s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
6128{
6129        struct igc_adapter *adapter = hw->back;
6130
6131        if (!pci_is_pcie(adapter->pdev))
6132                return -IGC_ERR_CONFIG;
6133
6134        pcie_capability_read_word(adapter->pdev, reg, value);
6135
6136        return IGC_SUCCESS;
6137}
6138
6139s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
6140{
6141        struct igc_adapter *adapter = hw->back;
6142
6143        if (!pci_is_pcie(adapter->pdev))
6144                return -IGC_ERR_CONFIG;
6145
6146        pcie_capability_write_word(adapter->pdev, reg, *value);
6147
6148        return IGC_SUCCESS;
6149}
6150
6151u32 igc_rd32(struct igc_hw *hw, u32 reg)
6152{
6153        struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
6154        u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
6155        u32 value = 0;
6156
6157        value = readl(&hw_addr[reg]);
6158
6159        /* reads should not return all F's */
6160        if (!(~value) && (!reg || !(~readl(hw_addr)))) {
6161                struct net_device *netdev = igc->netdev;
6162
6163                hw->hw_addr = NULL;
6164                netif_device_detach(netdev);
6165                netdev_err(netdev, "PCIe link lost, device now detached\n");
6166                WARN(pci_device_is_present(igc->pdev),
6167                     "igc: Failed to read reg 0x%x!\n", reg);
6168        }
6169
6170        return value;
6171}
6172
6173int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
6174{
6175        struct igc_mac_info *mac = &adapter->hw.mac;
6176
6177        mac->autoneg = false;
6178
6179        /* Make sure dplx is at most 1 bit and lsb of speed is not set
6180         * for the switch() below to work
6181         */
6182        if ((spd & 1) || (dplx & ~1))
6183                goto err_inval;
6184
6185        switch (spd + dplx) {
6186        case SPEED_10 + DUPLEX_HALF:
6187                mac->forced_speed_duplex = ADVERTISE_10_HALF;
6188                break;
6189        case SPEED_10 + DUPLEX_FULL:
6190                mac->forced_speed_duplex = ADVERTISE_10_FULL;
6191                break;
6192        case SPEED_100 + DUPLEX_HALF:
6193                mac->forced_speed_duplex = ADVERTISE_100_HALF;
6194                break;
6195        case SPEED_100 + DUPLEX_FULL:
6196                mac->forced_speed_duplex = ADVERTISE_100_FULL;
6197                break;
6198        case SPEED_1000 + DUPLEX_FULL:
6199                mac->autoneg = true;
6200                adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6201                break;
6202        case SPEED_1000 + DUPLEX_HALF: /* not supported */
6203                goto err_inval;
6204        case SPEED_2500 + DUPLEX_FULL:
6205                mac->autoneg = true;
6206                adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL;
6207                break;
6208        case SPEED_2500 + DUPLEX_HALF: /* not supported */
6209        default:
6210                goto err_inval;
6211        }
6212
6213        /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
6214        adapter->hw.phy.mdix = AUTO_ALL_MODES;
6215
6216        return 0;
6217
6218err_inval:
6219        netdev_err(adapter->netdev, "Unsupported Speed/Duplex configuration\n");
6220        return -EINVAL;
6221}
6222
6223/**
6224 * igc_probe - Device Initialization Routine
6225 * @pdev: PCI device information struct
6226 * @ent: entry in igc_pci_tbl
6227 *
6228 * Returns 0 on success, negative on failure
6229 *
6230 * igc_probe initializes an adapter identified by a pci_dev structure.
6231 * The OS initialization, configuring the adapter private structure,
6232 * and a hardware reset occur.
6233 */
6234static int igc_probe(struct pci_dev *pdev,
6235                     const struct pci_device_id *ent)
6236{
6237        struct igc_adapter *adapter;
6238        struct net_device *netdev;
6239        struct igc_hw *hw;
6240        const struct igc_info *ei = igc_info_tbl[ent->driver_data];
6241        int err, pci_using_dac;
6242
6243        err = pci_enable_device_mem(pdev);
6244        if (err)
6245                return err;
6246
6247        pci_using_dac = 0;
6248        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
6249        if (!err) {
6250                pci_using_dac = 1;
6251        } else {
6252                err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
6253                if (err) {
6254                        dev_err(&pdev->dev,
6255                                "No usable DMA configuration, aborting\n");
6256                        goto err_dma;
6257                }
6258        }
6259
6260        err = pci_request_mem_regions(pdev, igc_driver_name);
6261        if (err)
6262                goto err_pci_reg;
6263
6264        pci_enable_pcie_error_reporting(pdev);
6265
6266        err = pci_enable_ptm(pdev, NULL);
6267        if (err < 0)
6268                dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n");
6269
6270        pci_set_master(pdev);
6271
6272        err = -ENOMEM;
6273        netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
6274                                   IGC_MAX_TX_QUEUES);
6275
6276        if (!netdev)
6277                goto err_alloc_etherdev;
6278
6279        SET_NETDEV_DEV(netdev, &pdev->dev);
6280
6281        pci_set_drvdata(pdev, netdev);
6282        adapter = netdev_priv(netdev);
6283        adapter->netdev = netdev;
6284        adapter->pdev = pdev;
6285        hw = &adapter->hw;
6286        hw->back = adapter;
6287        adapter->port_num = hw->bus.func;
6288        adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
6289
6290        err = pci_save_state(pdev);
6291        if (err)
6292                goto err_ioremap;
6293
6294        err = -EIO;
6295        adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
6296                                   pci_resource_len(pdev, 0));
6297        if (!adapter->io_addr)
6298                goto err_ioremap;
6299
6300        /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
6301        hw->hw_addr = adapter->io_addr;
6302
6303        netdev->netdev_ops = &igc_netdev_ops;
6304        igc_ethtool_set_ops(netdev);
6305        netdev->watchdog_timeo = 5 * HZ;
6306
6307        netdev->mem_start = pci_resource_start(pdev, 0);
6308        netdev->mem_end = pci_resource_end(pdev, 0);
6309
6310        /* PCI config space info */
6311        hw->vendor_id = pdev->vendor;
6312        hw->device_id = pdev->device;
6313        hw->revision_id = pdev->revision;
6314        hw->subsystem_vendor_id = pdev->subsystem_vendor;
6315        hw->subsystem_device_id = pdev->subsystem_device;
6316
6317        /* Copy the default MAC and PHY function pointers */
6318        memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
6319        memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
6320
6321        /* Initialize skew-specific constants */
6322        err = ei->get_invariants(hw);
6323        if (err)
6324                goto err_sw_init;
6325
6326        /* Add supported features to the features list*/
6327        netdev->features |= NETIF_F_SG;
6328        netdev->features |= NETIF_F_TSO;
6329        netdev->features |= NETIF_F_TSO6;
6330        netdev->features |= NETIF_F_TSO_ECN;
6331        netdev->features |= NETIF_F_RXCSUM;
6332        netdev->features |= NETIF_F_HW_CSUM;
6333        netdev->features |= NETIF_F_SCTP_CRC;
6334        netdev->features |= NETIF_F_HW_TC;
6335
6336#define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
6337                                  NETIF_F_GSO_GRE_CSUM | \
6338                                  NETIF_F_GSO_IPXIP4 | \
6339                                  NETIF_F_GSO_IPXIP6 | \
6340                                  NETIF_F_GSO_UDP_TUNNEL | \
6341                                  NETIF_F_GSO_UDP_TUNNEL_CSUM)
6342
6343        netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES;
6344        netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES;
6345
6346        /* setup the private structure */
6347        err = igc_sw_init(adapter);
6348        if (err)
6349                goto err_sw_init;
6350
6351        /* copy netdev features into list of user selectable features */
6352        netdev->hw_features |= NETIF_F_NTUPLE;
6353        netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
6354        netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
6355        netdev->hw_features |= netdev->features;
6356
6357        if (pci_using_dac)
6358                netdev->features |= NETIF_F_HIGHDMA;
6359
6360        netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
6361        netdev->mpls_features |= NETIF_F_HW_CSUM;
6362        netdev->hw_enc_features |= netdev->vlan_features;
6363
6364        /* MTU range: 68 - 9216 */
6365        netdev->min_mtu = ETH_MIN_MTU;
6366        netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
6367
6368        /* before reading the NVM, reset the controller to put the device in a
6369         * known good starting state
6370         */
6371        hw->mac.ops.reset_hw(hw);
6372
6373        if (igc_get_flash_presence_i225(hw)) {
6374                if (hw->nvm.ops.validate(hw) < 0) {
6375                        dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
6376                        err = -EIO;
6377                        goto err_eeprom;
6378                }
6379        }
6380
6381        if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
6382                /* copy the MAC address out of the NVM */
6383                if (hw->mac.ops.read_mac_addr(hw))
6384                        dev_err(&pdev->dev, "NVM Read Error\n");
6385        }
6386
6387        eth_hw_addr_set(netdev, hw->mac.addr);
6388
6389        if (!is_valid_ether_addr(netdev->dev_addr)) {
6390                dev_err(&pdev->dev, "Invalid MAC Address\n");
6391                err = -EIO;
6392                goto err_eeprom;
6393        }
6394
6395        /* configure RXPBSIZE and TXPBSIZE */
6396        wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
6397        wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
6398
6399        timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
6400        timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
6401
6402        INIT_WORK(&adapter->reset_task, igc_reset_task);
6403        INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
6404
6405        /* Initialize link properties that are user-changeable */
6406        adapter->fc_autoneg = true;
6407        hw->mac.autoneg = true;
6408        hw->phy.autoneg_advertised = 0xaf;
6409
6410        hw->fc.requested_mode = igc_fc_default;
6411        hw->fc.current_mode = igc_fc_default;
6412
6413        /* By default, support wake on port A */
6414        adapter->flags |= IGC_FLAG_WOL_SUPPORTED;
6415
6416        /* initialize the wol settings based on the eeprom settings */
6417        if (adapter->flags & IGC_FLAG_WOL_SUPPORTED)
6418                adapter->wol |= IGC_WUFC_MAG;
6419
6420        device_set_wakeup_enable(&adapter->pdev->dev,
6421                                 adapter->flags & IGC_FLAG_WOL_SUPPORTED);
6422
6423        igc_ptp_init(adapter);
6424
6425        igc_tsn_clear_schedule(adapter);
6426
6427        /* reset the hardware with the new settings */
6428        igc_reset(adapter);
6429
6430        /* let the f/w know that the h/w is now under the control of the
6431         * driver.
6432         */
6433        igc_get_hw_control(adapter);
6434
6435        strncpy(netdev->name, "eth%d", IFNAMSIZ);
6436        err = register_netdev(netdev);
6437        if (err)
6438                goto err_register;
6439
6440         /* carrier off reporting is important to ethtool even BEFORE open */
6441        netif_carrier_off(netdev);
6442
6443        /* Check if Media Autosense is enabled */
6444        adapter->ei = *ei;
6445
6446        /* print pcie link status and MAC address */
6447        pcie_print_link_status(pdev);
6448        netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
6449
6450        dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
6451        /* Disable EEE for internal PHY devices */
6452        hw->dev_spec._base.eee_enable = false;
6453        adapter->flags &= ~IGC_FLAG_EEE;
6454        igc_set_eee_i225(hw, false, false, false);
6455
6456        pm_runtime_put_noidle(&pdev->dev);
6457
6458        return 0;
6459
6460err_register:
6461        igc_release_hw_control(adapter);
6462err_eeprom:
6463        if (!igc_check_reset_block(hw))
6464                igc_reset_phy(hw);
6465err_sw_init:
6466        igc_clear_interrupt_scheme(adapter);
6467        iounmap(adapter->io_addr);
6468err_ioremap:
6469        free_netdev(netdev);
6470err_alloc_etherdev:
6471        pci_disable_pcie_error_reporting(pdev);
6472        pci_release_mem_regions(pdev);
6473err_pci_reg:
6474err_dma:
6475        pci_disable_device(pdev);
6476        return err;
6477}
6478
6479/**
6480 * igc_remove - Device Removal Routine
6481 * @pdev: PCI device information struct
6482 *
6483 * igc_remove is called by the PCI subsystem to alert the driver
6484 * that it should release a PCI device.  This could be caused by a
6485 * Hot-Plug event, or because the driver is going to be removed from
6486 * memory.
6487 */
6488static void igc_remove(struct pci_dev *pdev)
6489{
6490        struct net_device *netdev = pci_get_drvdata(pdev);
6491        struct igc_adapter *adapter = netdev_priv(netdev);
6492
6493        pm_runtime_get_noresume(&pdev->dev);
6494
6495        igc_flush_nfc_rules(adapter);
6496
6497        igc_ptp_stop(adapter);
6498
6499        set_bit(__IGC_DOWN, &adapter->state);
6500
6501        del_timer_sync(&adapter->watchdog_timer);
6502        del_timer_sync(&adapter->phy_info_timer);
6503
6504        cancel_work_sync(&adapter->reset_task);
6505        cancel_work_sync(&adapter->watchdog_task);
6506
6507        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6508         * would have already happened in close and is redundant.
6509         */
6510        igc_release_hw_control(adapter);
6511        unregister_netdev(netdev);
6512
6513        igc_clear_interrupt_scheme(adapter);
6514        pci_iounmap(pdev, adapter->io_addr);
6515        pci_release_mem_regions(pdev);
6516
6517        free_netdev(netdev);
6518
6519        pci_disable_pcie_error_reporting(pdev);
6520
6521        pci_disable_device(pdev);
6522}
6523
6524static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake,
6525                          bool runtime)
6526{
6527        struct net_device *netdev = pci_get_drvdata(pdev);
6528        struct igc_adapter *adapter = netdev_priv(netdev);
6529        u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol;
6530        struct igc_hw *hw = &adapter->hw;
6531        u32 ctrl, rctl, status;
6532        bool wake;
6533
6534        rtnl_lock();
6535        netif_device_detach(netdev);
6536
6537        if (netif_running(netdev))
6538                __igc_close(netdev, true);
6539
6540        igc_ptp_suspend(adapter);
6541
6542        igc_clear_interrupt_scheme(adapter);
6543        rtnl_unlock();
6544
6545        status = rd32(IGC_STATUS);
6546        if (status & IGC_STATUS_LU)
6547                wufc &= ~IGC_WUFC_LNKC;
6548
6549        if (wufc) {
6550                igc_setup_rctl(adapter);
6551                igc_set_rx_mode(netdev);
6552
6553                /* turn on all-multi mode if wake on multicast is enabled */
6554                if (wufc & IGC_WUFC_MC) {
6555                        rctl = rd32(IGC_RCTL);
6556                        rctl |= IGC_RCTL_MPE;
6557                        wr32(IGC_RCTL, rctl);
6558                }
6559
6560                ctrl = rd32(IGC_CTRL);
6561                ctrl |= IGC_CTRL_ADVD3WUC;
6562                wr32(IGC_CTRL, ctrl);
6563
6564                /* Allow time for pending master requests to run */
6565                igc_disable_pcie_master(hw);
6566
6567                wr32(IGC_WUC, IGC_WUC_PME_EN);
6568                wr32(IGC_WUFC, wufc);
6569        } else {
6570                wr32(IGC_WUC, 0);
6571                wr32(IGC_WUFC, 0);
6572        }
6573
6574        wake = wufc || adapter->en_mng_pt;
6575        if (!wake)
6576                igc_power_down_phy_copper_base(&adapter->hw);
6577        else
6578                igc_power_up_link(adapter);
6579
6580        if (enable_wake)
6581                *enable_wake = wake;
6582
6583        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6584         * would have already happened in close and is redundant.
6585         */
6586        igc_release_hw_control(adapter);
6587
6588        pci_disable_device(pdev);
6589
6590        return 0;
6591}
6592
6593#ifdef CONFIG_PM
6594static int __maybe_unused igc_runtime_suspend(struct device *dev)
6595{
6596        return __igc_shutdown(to_pci_dev(dev), NULL, 1);
6597}
6598
6599static void igc_deliver_wake_packet(struct net_device *netdev)
6600{
6601        struct igc_adapter *adapter = netdev_priv(netdev);
6602        struct igc_hw *hw = &adapter->hw;
6603        struct sk_buff *skb;
6604        u32 wupl;
6605
6606        wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK;
6607
6608        /* WUPM stores only the first 128 bytes of the wake packet.
6609         * Read the packet only if we have the whole thing.
6610         */
6611        if (wupl == 0 || wupl > IGC_WUPM_BYTES)
6612                return;
6613
6614        skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES);
6615        if (!skb)
6616                return;
6617
6618        skb_put(skb, wupl);
6619
6620        /* Ensure reads are 32-bit aligned */
6621        wupl = roundup(wupl, 4);
6622
6623        memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl);
6624
6625        skb->protocol = eth_type_trans(skb, netdev);
6626        netif_rx(skb);
6627}
6628
6629static int __maybe_unused igc_resume(struct device *dev)
6630{
6631        struct pci_dev *pdev = to_pci_dev(dev);
6632        struct net_device *netdev = pci_get_drvdata(pdev);
6633        struct igc_adapter *adapter = netdev_priv(netdev);
6634        struct igc_hw *hw = &adapter->hw;
6635        u32 err, val;
6636
6637        pci_set_power_state(pdev, PCI_D0);
6638        pci_restore_state(pdev);
6639        pci_save_state(pdev);
6640
6641        if (!pci_device_is_present(pdev))
6642                return -ENODEV;
6643        err = pci_enable_device_mem(pdev);
6644        if (err) {
6645                netdev_err(netdev, "Cannot enable PCI device from suspend\n");
6646                return err;
6647        }
6648        pci_set_master(pdev);
6649
6650        pci_enable_wake(pdev, PCI_D3hot, 0);
6651        pci_enable_wake(pdev, PCI_D3cold, 0);
6652
6653        if (igc_init_interrupt_scheme(adapter, true)) {
6654                netdev_err(netdev, "Unable to allocate memory for queues\n");
6655                return -ENOMEM;
6656        }
6657
6658        igc_reset(adapter);
6659
6660        /* let the f/w know that the h/w is now under the control of the
6661         * driver.
6662         */
6663        igc_get_hw_control(adapter);
6664
6665        val = rd32(IGC_WUS);
6666        if (val & WAKE_PKT_WUS)
6667                igc_deliver_wake_packet(netdev);
6668
6669        wr32(IGC_WUS, ~0);
6670
6671        rtnl_lock();
6672        if (!err && netif_running(netdev))
6673                err = __igc_open(netdev, true);
6674
6675        if (!err)
6676                netif_device_attach(netdev);
6677        rtnl_unlock();
6678
6679        return err;
6680}
6681
6682static int __maybe_unused igc_runtime_resume(struct device *dev)
6683{
6684        return igc_resume(dev);
6685}
6686
6687static int __maybe_unused igc_suspend(struct device *dev)
6688{
6689        return __igc_shutdown(to_pci_dev(dev), NULL, 0);
6690}
6691
6692static int __maybe_unused igc_runtime_idle(struct device *dev)
6693{
6694        struct net_device *netdev = dev_get_drvdata(dev);
6695        struct igc_adapter *adapter = netdev_priv(netdev);
6696
6697        if (!igc_has_link(adapter))
6698                pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6699
6700        return -EBUSY;
6701}
6702#endif /* CONFIG_PM */
6703
6704static void igc_shutdown(struct pci_dev *pdev)
6705{
6706        bool wake;
6707
6708        __igc_shutdown(pdev, &wake, 0);
6709
6710        if (system_state == SYSTEM_POWER_OFF) {
6711                pci_wake_from_d3(pdev, wake);
6712                pci_set_power_state(pdev, PCI_D3hot);
6713        }
6714}
6715
6716/**
6717 *  igc_io_error_detected - called when PCI error is detected
6718 *  @pdev: Pointer to PCI device
6719 *  @state: The current PCI connection state
6720 *
6721 *  This function is called after a PCI bus error affecting
6722 *  this device has been detected.
6723 **/
6724static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
6725                                              pci_channel_state_t state)
6726{
6727        struct net_device *netdev = pci_get_drvdata(pdev);
6728        struct igc_adapter *adapter = netdev_priv(netdev);
6729
6730        netif_device_detach(netdev);
6731
6732        if (state == pci_channel_io_perm_failure)
6733                return PCI_ERS_RESULT_DISCONNECT;
6734
6735        if (netif_running(netdev))
6736                igc_down(adapter);
6737        pci_disable_device(pdev);
6738
6739        /* Request a slot reset. */
6740        return PCI_ERS_RESULT_NEED_RESET;
6741}
6742
6743/**
6744 *  igc_io_slot_reset - called after the PCI bus has been reset.
6745 *  @pdev: Pointer to PCI device
6746 *
6747 *  Restart the card from scratch, as if from a cold-boot. Implementation
6748 *  resembles the first-half of the igc_resume routine.
6749 **/
6750static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
6751{
6752        struct net_device *netdev = pci_get_drvdata(pdev);
6753        struct igc_adapter *adapter = netdev_priv(netdev);
6754        struct igc_hw *hw = &adapter->hw;
6755        pci_ers_result_t result;
6756
6757        if (pci_enable_device_mem(pdev)) {
6758                netdev_err(netdev, "Could not re-enable PCI device after reset\n");
6759                result = PCI_ERS_RESULT_DISCONNECT;
6760        } else {
6761                pci_set_master(pdev);
6762                pci_restore_state(pdev);
6763                pci_save_state(pdev);
6764
6765                pci_enable_wake(pdev, PCI_D3hot, 0);
6766                pci_enable_wake(pdev, PCI_D3cold, 0);
6767
6768                /* In case of PCI error, adapter loses its HW address
6769                 * so we should re-assign it here.
6770                 */
6771                hw->hw_addr = adapter->io_addr;
6772
6773                igc_reset(adapter);
6774                wr32(IGC_WUS, ~0);
6775                result = PCI_ERS_RESULT_RECOVERED;
6776        }
6777
6778        return result;
6779}
6780
6781/**
6782 *  igc_io_resume - called when traffic can start to flow again.
6783 *  @pdev: Pointer to PCI device
6784 *
6785 *  This callback is called when the error recovery driver tells us that
6786 *  its OK to resume normal operation. Implementation resembles the
6787 *  second-half of the igc_resume routine.
6788 */
6789static void igc_io_resume(struct pci_dev *pdev)
6790{
6791        struct net_device *netdev = pci_get_drvdata(pdev);
6792        struct igc_adapter *adapter = netdev_priv(netdev);
6793
6794        rtnl_lock();
6795        if (netif_running(netdev)) {
6796                if (igc_open(netdev)) {
6797                        netdev_err(netdev, "igc_open failed after reset\n");
6798                        return;
6799                }
6800        }
6801
6802        netif_device_attach(netdev);
6803
6804        /* let the f/w know that the h/w is now under the control of the
6805         * driver.
6806         */
6807        igc_get_hw_control(adapter);
6808        rtnl_unlock();
6809}
6810
6811static const struct pci_error_handlers igc_err_handler = {
6812        .error_detected = igc_io_error_detected,
6813        .slot_reset = igc_io_slot_reset,
6814        .resume = igc_io_resume,
6815};
6816
6817#ifdef CONFIG_PM
6818static const struct dev_pm_ops igc_pm_ops = {
6819        SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
6820        SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
6821                           igc_runtime_idle)
6822};
6823#endif
6824
6825static struct pci_driver igc_driver = {
6826        .name     = igc_driver_name,
6827        .id_table = igc_pci_tbl,
6828        .probe    = igc_probe,
6829        .remove   = igc_remove,
6830#ifdef CONFIG_PM
6831        .driver.pm = &igc_pm_ops,
6832#endif
6833        .shutdown = igc_shutdown,
6834        .err_handler = &igc_err_handler,
6835};
6836
6837/**
6838 * igc_reinit_queues - return error
6839 * @adapter: pointer to adapter structure
6840 */
6841int igc_reinit_queues(struct igc_adapter *adapter)
6842{
6843        struct net_device *netdev = adapter->netdev;
6844        int err = 0;
6845
6846        if (netif_running(netdev))
6847                igc_close(netdev);
6848
6849        igc_reset_interrupt_capability(adapter);
6850
6851        if (igc_init_interrupt_scheme(adapter, true)) {
6852                netdev_err(netdev, "Unable to allocate memory for queues\n");
6853                return -ENOMEM;
6854        }
6855
6856        if (netif_running(netdev))
6857                err = igc_open(netdev);
6858
6859        return err;
6860}
6861
6862/**
6863 * igc_get_hw_dev - return device
6864 * @hw: pointer to hardware structure
6865 *
6866 * used by hardware layer to print debugging information
6867 */
6868struct net_device *igc_get_hw_dev(struct igc_hw *hw)
6869{
6870        struct igc_adapter *adapter = hw->back;
6871
6872        return adapter->netdev;
6873}
6874
6875static void igc_disable_rx_ring_hw(struct igc_ring *ring)
6876{
6877        struct igc_hw *hw = &ring->q_vector->adapter->hw;
6878        u8 idx = ring->reg_idx;
6879        u32 rxdctl;
6880
6881        rxdctl = rd32(IGC_RXDCTL(idx));
6882        rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE;
6883        rxdctl |= IGC_RXDCTL_SWFLUSH;
6884        wr32(IGC_RXDCTL(idx), rxdctl);
6885}
6886
6887void igc_disable_rx_ring(struct igc_ring *ring)
6888{
6889        igc_disable_rx_ring_hw(ring);
6890        igc_clean_rx_ring(ring);
6891}
6892
6893void igc_enable_rx_ring(struct igc_ring *ring)
6894{
6895        struct igc_adapter *adapter = ring->q_vector->adapter;
6896
6897        igc_configure_rx_ring(adapter, ring);
6898
6899        if (ring->xsk_pool)
6900                igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
6901        else
6902                igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
6903}
6904
6905static void igc_disable_tx_ring_hw(struct igc_ring *ring)
6906{
6907        struct igc_hw *hw = &ring->q_vector->adapter->hw;
6908        u8 idx = ring->reg_idx;
6909        u32 txdctl;
6910
6911        txdctl = rd32(IGC_TXDCTL(idx));
6912        txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
6913        txdctl |= IGC_TXDCTL_SWFLUSH;
6914        wr32(IGC_TXDCTL(idx), txdctl);
6915}
6916
6917void igc_disable_tx_ring(struct igc_ring *ring)
6918{
6919        igc_disable_tx_ring_hw(ring);
6920        igc_clean_tx_ring(ring);
6921}
6922
6923void igc_enable_tx_ring(struct igc_ring *ring)
6924{
6925        struct igc_adapter *adapter = ring->q_vector->adapter;
6926
6927        igc_configure_tx_ring(adapter, ring);
6928}
6929
6930/**
6931 * igc_init_module - Driver Registration Routine
6932 *
6933 * igc_init_module is the first routine called when the driver is
6934 * loaded. All it does is register with the PCI subsystem.
6935 */
6936static int __init igc_init_module(void)
6937{
6938        int ret;
6939
6940        pr_info("%s\n", igc_driver_string);
6941        pr_info("%s\n", igc_copyright);
6942
6943        ret = pci_register_driver(&igc_driver);
6944        return ret;
6945}
6946
6947module_init(igc_init_module);
6948
6949/**
6950 * igc_exit_module - Driver Exit Cleanup Routine
6951 *
6952 * igc_exit_module is called just before the driver is removed
6953 * from memory.
6954 */
6955static void __exit igc_exit_module(void)
6956{
6957        pci_unregister_driver(&igc_driver);
6958}
6959
6960module_exit(igc_exit_module);
6961/* igc_main.c */
6962