linux/drivers/net/ethernet/intel/igc/igc_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c)  2018 Intel Corporation */
   3
   4#include <linux/module.h>
   5#include <linux/types.h>
   6#include <linux/if_vlan.h>
   7#include <linux/aer.h>
   8#include <linux/tcp.h>
   9#include <linux/udp.h>
  10#include <linux/ip.h>
  11#include <linux/pm_runtime.h>
  12#include <net/pkt_sched.h>
  13#include <linux/bpf_trace.h>
  14#include <net/xdp_sock_drv.h>
  15#include <linux/pci.h>
  16
  17#include <net/ipv6.h>
  18
  19#include "igc.h"
  20#include "igc_hw.h"
  21#include "igc_tsn.h"
  22#include "igc_xdp.h"
  23
  24#define DRV_SUMMARY     "Intel(R) 2.5G Ethernet Linux Driver"
  25
  26#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
  27
  28#define IGC_XDP_PASS            0
  29#define IGC_XDP_CONSUMED        BIT(0)
  30#define IGC_XDP_TX              BIT(1)
  31#define IGC_XDP_REDIRECT        BIT(2)
  32
  33static int debug = -1;
  34
  35MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
  36MODULE_DESCRIPTION(DRV_SUMMARY);
  37MODULE_LICENSE("GPL v2");
  38module_param(debug, int, 0);
  39MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
  40
  41char igc_driver_name[] = "igc";
  42static const char igc_driver_string[] = DRV_SUMMARY;
  43static const char igc_copyright[] =
  44        "Copyright(c) 2018 Intel Corporation.";
  45
  46static const struct igc_info *igc_info_tbl[] = {
  47        [board_base] = &igc_base_info,
  48};
  49
  50static const struct pci_device_id igc_pci_tbl[] = {
  51        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
  52        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
  53        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base },
  54        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base },
  55        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base },
  56        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base },
  57        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base },
  58        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base },
  59        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base },
  60        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base },
  61        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base },
  62        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base },
  63        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base },
  64        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base },
  65        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base },
  66        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base },
  67        /* required last entry */
  68        {0, }
  69};
  70
  71MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
  72
  73enum latency_range {
  74        lowest_latency = 0,
  75        low_latency = 1,
  76        bulk_latency = 2,
  77        latency_invalid = 255
  78};
  79
  80void igc_reset(struct igc_adapter *adapter)
  81{
  82        struct net_device *dev = adapter->netdev;
  83        struct igc_hw *hw = &adapter->hw;
  84        struct igc_fc_info *fc = &hw->fc;
  85        u32 pba, hwm;
  86
  87        /* Repartition PBA for greater than 9k MTU if required */
  88        pba = IGC_PBA_34K;
  89
  90        /* flow control settings
  91         * The high water mark must be low enough to fit one full frame
  92         * after transmitting the pause frame.  As such we must have enough
  93         * space to allow for us to complete our current transmit and then
  94         * receive the frame that is in progress from the link partner.
  95         * Set it to:
  96         * - the full Rx FIFO size minus one full Tx plus one full Rx frame
  97         */
  98        hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
  99
 100        fc->high_water = hwm & 0xFFFFFFF0;      /* 16-byte granularity */
 101        fc->low_water = fc->high_water - 16;
 102        fc->pause_time = 0xFFFF;
 103        fc->send_xon = 1;
 104        fc->current_mode = fc->requested_mode;
 105
 106        hw->mac.ops.reset_hw(hw);
 107
 108        if (hw->mac.ops.init_hw(hw))
 109                netdev_err(dev, "Error on hardware initialization\n");
 110
 111        /* Re-establish EEE setting */
 112        igc_set_eee_i225(hw, true, true, true);
 113
 114        if (!netif_running(adapter->netdev))
 115                igc_power_down_phy_copper_base(&adapter->hw);
 116
 117        /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */
 118        wr32(IGC_VET, ETH_P_8021Q);
 119
 120        /* Re-enable PTP, where applicable. */
 121        igc_ptp_reset(adapter);
 122
 123        /* Re-enable TSN offloading, where applicable. */
 124        igc_tsn_reset(adapter);
 125
 126        igc_get_phy_info(hw);
 127}
 128
 129/**
 130 * igc_power_up_link - Power up the phy link
 131 * @adapter: address of board private structure
 132 */
 133static void igc_power_up_link(struct igc_adapter *adapter)
 134{
 135        igc_reset_phy(&adapter->hw);
 136
 137        igc_power_up_phy_copper(&adapter->hw);
 138
 139        igc_setup_link(&adapter->hw);
 140}
 141
 142/**
 143 * igc_release_hw_control - release control of the h/w to f/w
 144 * @adapter: address of board private structure
 145 *
 146 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
 147 * For ASF and Pass Through versions of f/w this means that the
 148 * driver is no longer loaded.
 149 */
 150static void igc_release_hw_control(struct igc_adapter *adapter)
 151{
 152        struct igc_hw *hw = &adapter->hw;
 153        u32 ctrl_ext;
 154
 155        if (!pci_device_is_present(adapter->pdev))
 156                return;
 157
 158        /* Let firmware take over control of h/w */
 159        ctrl_ext = rd32(IGC_CTRL_EXT);
 160        wr32(IGC_CTRL_EXT,
 161             ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
 162}
 163
 164/**
 165 * igc_get_hw_control - get control of the h/w from f/w
 166 * @adapter: address of board private structure
 167 *
 168 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
 169 * For ASF and Pass Through versions of f/w this means that
 170 * the driver is loaded.
 171 */
 172static void igc_get_hw_control(struct igc_adapter *adapter)
 173{
 174        struct igc_hw *hw = &adapter->hw;
 175        u32 ctrl_ext;
 176
 177        /* Let firmware know the driver has taken over */
 178        ctrl_ext = rd32(IGC_CTRL_EXT);
 179        wr32(IGC_CTRL_EXT,
 180             ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
 181}
 182
 183static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf)
 184{
 185        dma_unmap_single(dev, dma_unmap_addr(buf, dma),
 186                         dma_unmap_len(buf, len), DMA_TO_DEVICE);
 187
 188        dma_unmap_len_set(buf, len, 0);
 189}
 190
 191/**
 192 * igc_clean_tx_ring - Free Tx Buffers
 193 * @tx_ring: ring to be cleaned
 194 */
 195static void igc_clean_tx_ring(struct igc_ring *tx_ring)
 196{
 197        u16 i = tx_ring->next_to_clean;
 198        struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
 199        u32 xsk_frames = 0;
 200
 201        while (i != tx_ring->next_to_use) {
 202                union igc_adv_tx_desc *eop_desc, *tx_desc;
 203
 204                switch (tx_buffer->type) {
 205                case IGC_TX_BUFFER_TYPE_XSK:
 206                        xsk_frames++;
 207                        break;
 208                case IGC_TX_BUFFER_TYPE_XDP:
 209                        xdp_return_frame(tx_buffer->xdpf);
 210                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 211                        break;
 212                case IGC_TX_BUFFER_TYPE_SKB:
 213                        dev_kfree_skb_any(tx_buffer->skb);
 214                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 215                        break;
 216                default:
 217                        netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
 218                        break;
 219                }
 220
 221                /* check for eop_desc to determine the end of the packet */
 222                eop_desc = tx_buffer->next_to_watch;
 223                tx_desc = IGC_TX_DESC(tx_ring, i);
 224
 225                /* unmap remaining buffers */
 226                while (tx_desc != eop_desc) {
 227                        tx_buffer++;
 228                        tx_desc++;
 229                        i++;
 230                        if (unlikely(i == tx_ring->count)) {
 231                                i = 0;
 232                                tx_buffer = tx_ring->tx_buffer_info;
 233                                tx_desc = IGC_TX_DESC(tx_ring, 0);
 234                        }
 235
 236                        /* unmap any remaining paged data */
 237                        if (dma_unmap_len(tx_buffer, len))
 238                                igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 239                }
 240
 241                tx_buffer->next_to_watch = NULL;
 242
 243                /* move us one more past the eop_desc for start of next pkt */
 244                tx_buffer++;
 245                i++;
 246                if (unlikely(i == tx_ring->count)) {
 247                        i = 0;
 248                        tx_buffer = tx_ring->tx_buffer_info;
 249                }
 250        }
 251
 252        if (tx_ring->xsk_pool && xsk_frames)
 253                xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
 254
 255        /* reset BQL for queue */
 256        netdev_tx_reset_queue(txring_txq(tx_ring));
 257
 258        /* reset next_to_use and next_to_clean */
 259        tx_ring->next_to_use = 0;
 260        tx_ring->next_to_clean = 0;
 261}
 262
 263/**
 264 * igc_free_tx_resources - Free Tx Resources per Queue
 265 * @tx_ring: Tx descriptor ring for a specific queue
 266 *
 267 * Free all transmit software resources
 268 */
 269void igc_free_tx_resources(struct igc_ring *tx_ring)
 270{
 271        igc_clean_tx_ring(tx_ring);
 272
 273        vfree(tx_ring->tx_buffer_info);
 274        tx_ring->tx_buffer_info = NULL;
 275
 276        /* if not set, then don't free */
 277        if (!tx_ring->desc)
 278                return;
 279
 280        dma_free_coherent(tx_ring->dev, tx_ring->size,
 281                          tx_ring->desc, tx_ring->dma);
 282
 283        tx_ring->desc = NULL;
 284}
 285
 286/**
 287 * igc_free_all_tx_resources - Free Tx Resources for All Queues
 288 * @adapter: board private structure
 289 *
 290 * Free all transmit software resources
 291 */
 292static void igc_free_all_tx_resources(struct igc_adapter *adapter)
 293{
 294        int i;
 295
 296        for (i = 0; i < adapter->num_tx_queues; i++)
 297                igc_free_tx_resources(adapter->tx_ring[i]);
 298}
 299
 300/**
 301 * igc_clean_all_tx_rings - Free Tx Buffers for all queues
 302 * @adapter: board private structure
 303 */
 304static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
 305{
 306        int i;
 307
 308        for (i = 0; i < adapter->num_tx_queues; i++)
 309                if (adapter->tx_ring[i])
 310                        igc_clean_tx_ring(adapter->tx_ring[i]);
 311}
 312
 313/**
 314 * igc_setup_tx_resources - allocate Tx resources (Descriptors)
 315 * @tx_ring: tx descriptor ring (for a specific queue) to setup
 316 *
 317 * Return 0 on success, negative on failure
 318 */
 319int igc_setup_tx_resources(struct igc_ring *tx_ring)
 320{
 321        struct net_device *ndev = tx_ring->netdev;
 322        struct device *dev = tx_ring->dev;
 323        int size = 0;
 324
 325        size = sizeof(struct igc_tx_buffer) * tx_ring->count;
 326        tx_ring->tx_buffer_info = vzalloc(size);
 327        if (!tx_ring->tx_buffer_info)
 328                goto err;
 329
 330        /* round up to nearest 4K */
 331        tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
 332        tx_ring->size = ALIGN(tx_ring->size, 4096);
 333
 334        tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
 335                                           &tx_ring->dma, GFP_KERNEL);
 336
 337        if (!tx_ring->desc)
 338                goto err;
 339
 340        tx_ring->next_to_use = 0;
 341        tx_ring->next_to_clean = 0;
 342
 343        return 0;
 344
 345err:
 346        vfree(tx_ring->tx_buffer_info);
 347        netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n");
 348        return -ENOMEM;
 349}
 350
 351/**
 352 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
 353 * @adapter: board private structure
 354 *
 355 * Return 0 on success, negative on failure
 356 */
 357static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
 358{
 359        struct net_device *dev = adapter->netdev;
 360        int i, err = 0;
 361
 362        for (i = 0; i < adapter->num_tx_queues; i++) {
 363                err = igc_setup_tx_resources(adapter->tx_ring[i]);
 364                if (err) {
 365                        netdev_err(dev, "Error on Tx queue %u setup\n", i);
 366                        for (i--; i >= 0; i--)
 367                                igc_free_tx_resources(adapter->tx_ring[i]);
 368                        break;
 369                }
 370        }
 371
 372        return err;
 373}
 374
 375static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring)
 376{
 377        u16 i = rx_ring->next_to_clean;
 378
 379        dev_kfree_skb(rx_ring->skb);
 380        rx_ring->skb = NULL;
 381
 382        /* Free all the Rx ring sk_buffs */
 383        while (i != rx_ring->next_to_alloc) {
 384                struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
 385
 386                /* Invalidate cache lines that may have been written to by
 387                 * device so that we avoid corrupting memory.
 388                 */
 389                dma_sync_single_range_for_cpu(rx_ring->dev,
 390                                              buffer_info->dma,
 391                                              buffer_info->page_offset,
 392                                              igc_rx_bufsz(rx_ring),
 393                                              DMA_FROM_DEVICE);
 394
 395                /* free resources associated with mapping */
 396                dma_unmap_page_attrs(rx_ring->dev,
 397                                     buffer_info->dma,
 398                                     igc_rx_pg_size(rx_ring),
 399                                     DMA_FROM_DEVICE,
 400                                     IGC_RX_DMA_ATTR);
 401                __page_frag_cache_drain(buffer_info->page,
 402                                        buffer_info->pagecnt_bias);
 403
 404                i++;
 405                if (i == rx_ring->count)
 406                        i = 0;
 407        }
 408}
 409
 410static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring)
 411{
 412        struct igc_rx_buffer *bi;
 413        u16 i;
 414
 415        for (i = 0; i < ring->count; i++) {
 416                bi = &ring->rx_buffer_info[i];
 417                if (!bi->xdp)
 418                        continue;
 419
 420                xsk_buff_free(bi->xdp);
 421                bi->xdp = NULL;
 422        }
 423}
 424
 425/**
 426 * igc_clean_rx_ring - Free Rx Buffers per Queue
 427 * @ring: ring to free buffers from
 428 */
 429static void igc_clean_rx_ring(struct igc_ring *ring)
 430{
 431        if (ring->xsk_pool)
 432                igc_clean_rx_ring_xsk_pool(ring);
 433        else
 434                igc_clean_rx_ring_page_shared(ring);
 435
 436        clear_ring_uses_large_buffer(ring);
 437
 438        ring->next_to_alloc = 0;
 439        ring->next_to_clean = 0;
 440        ring->next_to_use = 0;
 441}
 442
 443/**
 444 * igc_clean_all_rx_rings - Free Rx Buffers for all queues
 445 * @adapter: board private structure
 446 */
 447static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
 448{
 449        int i;
 450
 451        for (i = 0; i < adapter->num_rx_queues; i++)
 452                if (adapter->rx_ring[i])
 453                        igc_clean_rx_ring(adapter->rx_ring[i]);
 454}
 455
 456/**
 457 * igc_free_rx_resources - Free Rx Resources
 458 * @rx_ring: ring to clean the resources from
 459 *
 460 * Free all receive software resources
 461 */
 462void igc_free_rx_resources(struct igc_ring *rx_ring)
 463{
 464        igc_clean_rx_ring(rx_ring);
 465
 466        xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 467
 468        vfree(rx_ring->rx_buffer_info);
 469        rx_ring->rx_buffer_info = NULL;
 470
 471        /* if not set, then don't free */
 472        if (!rx_ring->desc)
 473                return;
 474
 475        dma_free_coherent(rx_ring->dev, rx_ring->size,
 476                          rx_ring->desc, rx_ring->dma);
 477
 478        rx_ring->desc = NULL;
 479}
 480
 481/**
 482 * igc_free_all_rx_resources - Free Rx Resources for All Queues
 483 * @adapter: board private structure
 484 *
 485 * Free all receive software resources
 486 */
 487static void igc_free_all_rx_resources(struct igc_adapter *adapter)
 488{
 489        int i;
 490
 491        for (i = 0; i < adapter->num_rx_queues; i++)
 492                igc_free_rx_resources(adapter->rx_ring[i]);
 493}
 494
 495/**
 496 * igc_setup_rx_resources - allocate Rx resources (Descriptors)
 497 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
 498 *
 499 * Returns 0 on success, negative on failure
 500 */
 501int igc_setup_rx_resources(struct igc_ring *rx_ring)
 502{
 503        struct net_device *ndev = rx_ring->netdev;
 504        struct device *dev = rx_ring->dev;
 505        u8 index = rx_ring->queue_index;
 506        int size, desc_len, res;
 507
 508        res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
 509                               rx_ring->q_vector->napi.napi_id);
 510        if (res < 0) {
 511                netdev_err(ndev, "Failed to register xdp_rxq index %u\n",
 512                           index);
 513                return res;
 514        }
 515
 516        size = sizeof(struct igc_rx_buffer) * rx_ring->count;
 517        rx_ring->rx_buffer_info = vzalloc(size);
 518        if (!rx_ring->rx_buffer_info)
 519                goto err;
 520
 521        desc_len = sizeof(union igc_adv_rx_desc);
 522
 523        /* Round up to nearest 4K */
 524        rx_ring->size = rx_ring->count * desc_len;
 525        rx_ring->size = ALIGN(rx_ring->size, 4096);
 526
 527        rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
 528                                           &rx_ring->dma, GFP_KERNEL);
 529
 530        if (!rx_ring->desc)
 531                goto err;
 532
 533        rx_ring->next_to_alloc = 0;
 534        rx_ring->next_to_clean = 0;
 535        rx_ring->next_to_use = 0;
 536
 537        return 0;
 538
 539err:
 540        xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 541        vfree(rx_ring->rx_buffer_info);
 542        rx_ring->rx_buffer_info = NULL;
 543        netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
 544        return -ENOMEM;
 545}
 546
 547/**
 548 * igc_setup_all_rx_resources - wrapper to allocate Rx resources
 549 *                                (Descriptors) for all queues
 550 * @adapter: board private structure
 551 *
 552 * Return 0 on success, negative on failure
 553 */
 554static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
 555{
 556        struct net_device *dev = adapter->netdev;
 557        int i, err = 0;
 558
 559        for (i = 0; i < adapter->num_rx_queues; i++) {
 560                err = igc_setup_rx_resources(adapter->rx_ring[i]);
 561                if (err) {
 562                        netdev_err(dev, "Error on Rx queue %u setup\n", i);
 563                        for (i--; i >= 0; i--)
 564                                igc_free_rx_resources(adapter->rx_ring[i]);
 565                        break;
 566                }
 567        }
 568
 569        return err;
 570}
 571
 572static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter,
 573                                              struct igc_ring *ring)
 574{
 575        if (!igc_xdp_is_enabled(adapter) ||
 576            !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags))
 577                return NULL;
 578
 579        return xsk_get_pool_from_qid(ring->netdev, ring->queue_index);
 580}
 581
 582/**
 583 * igc_configure_rx_ring - Configure a receive ring after Reset
 584 * @adapter: board private structure
 585 * @ring: receive ring to be configured
 586 *
 587 * Configure the Rx unit of the MAC after a reset.
 588 */
 589static void igc_configure_rx_ring(struct igc_adapter *adapter,
 590                                  struct igc_ring *ring)
 591{
 592        struct igc_hw *hw = &adapter->hw;
 593        union igc_adv_rx_desc *rx_desc;
 594        int reg_idx = ring->reg_idx;
 595        u32 srrctl = 0, rxdctl = 0;
 596        u64 rdba = ring->dma;
 597        u32 buf_size;
 598
 599        xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
 600        ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
 601        if (ring->xsk_pool) {
 602                WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 603                                                   MEM_TYPE_XSK_BUFF_POOL,
 604                                                   NULL));
 605                xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
 606        } else {
 607                WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 608                                                   MEM_TYPE_PAGE_SHARED,
 609                                                   NULL));
 610        }
 611
 612        if (igc_xdp_is_enabled(adapter))
 613                set_ring_uses_large_buffer(ring);
 614
 615        /* disable the queue */
 616        wr32(IGC_RXDCTL(reg_idx), 0);
 617
 618        /* Set DMA base address registers */
 619        wr32(IGC_RDBAL(reg_idx),
 620             rdba & 0x00000000ffffffffULL);
 621        wr32(IGC_RDBAH(reg_idx), rdba >> 32);
 622        wr32(IGC_RDLEN(reg_idx),
 623             ring->count * sizeof(union igc_adv_rx_desc));
 624
 625        /* initialize head and tail */
 626        ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
 627        wr32(IGC_RDH(reg_idx), 0);
 628        writel(0, ring->tail);
 629
 630        /* reset next-to- use/clean to place SW in sync with hardware */
 631        ring->next_to_clean = 0;
 632        ring->next_to_use = 0;
 633
 634        if (ring->xsk_pool)
 635                buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
 636        else if (ring_uses_large_buffer(ring))
 637                buf_size = IGC_RXBUFFER_3072;
 638        else
 639                buf_size = IGC_RXBUFFER_2048;
 640
 641        srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
 642        srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT;
 643        srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
 644
 645        wr32(IGC_SRRCTL(reg_idx), srrctl);
 646
 647        rxdctl |= IGC_RX_PTHRESH;
 648        rxdctl |= IGC_RX_HTHRESH << 8;
 649        rxdctl |= IGC_RX_WTHRESH << 16;
 650
 651        /* initialize rx_buffer_info */
 652        memset(ring->rx_buffer_info, 0,
 653               sizeof(struct igc_rx_buffer) * ring->count);
 654
 655        /* initialize Rx descriptor 0 */
 656        rx_desc = IGC_RX_DESC(ring, 0);
 657        rx_desc->wb.upper.length = 0;
 658
 659        /* enable receive descriptor fetching */
 660        rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
 661
 662        wr32(IGC_RXDCTL(reg_idx), rxdctl);
 663}
 664
 665/**
 666 * igc_configure_rx - Configure receive Unit after Reset
 667 * @adapter: board private structure
 668 *
 669 * Configure the Rx unit of the MAC after a reset.
 670 */
 671static void igc_configure_rx(struct igc_adapter *adapter)
 672{
 673        int i;
 674
 675        /* Setup the HW Rx Head and Tail Descriptor Pointers and
 676         * the Base and Length of the Rx Descriptor Ring
 677         */
 678        for (i = 0; i < adapter->num_rx_queues; i++)
 679                igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
 680}
 681
 682/**
 683 * igc_configure_tx_ring - Configure transmit ring after Reset
 684 * @adapter: board private structure
 685 * @ring: tx ring to configure
 686 *
 687 * Configure a transmit ring after a reset.
 688 */
 689static void igc_configure_tx_ring(struct igc_adapter *adapter,
 690                                  struct igc_ring *ring)
 691{
 692        struct igc_hw *hw = &adapter->hw;
 693        int reg_idx = ring->reg_idx;
 694        u64 tdba = ring->dma;
 695        u32 txdctl = 0;
 696
 697        ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
 698
 699        /* disable the queue */
 700        wr32(IGC_TXDCTL(reg_idx), 0);
 701        wrfl();
 702        mdelay(10);
 703
 704        wr32(IGC_TDLEN(reg_idx),
 705             ring->count * sizeof(union igc_adv_tx_desc));
 706        wr32(IGC_TDBAL(reg_idx),
 707             tdba & 0x00000000ffffffffULL);
 708        wr32(IGC_TDBAH(reg_idx), tdba >> 32);
 709
 710        ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
 711        wr32(IGC_TDH(reg_idx), 0);
 712        writel(0, ring->tail);
 713
 714        txdctl |= IGC_TX_PTHRESH;
 715        txdctl |= IGC_TX_HTHRESH << 8;
 716        txdctl |= IGC_TX_WTHRESH << 16;
 717
 718        txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
 719        wr32(IGC_TXDCTL(reg_idx), txdctl);
 720}
 721
 722/**
 723 * igc_configure_tx - Configure transmit Unit after Reset
 724 * @adapter: board private structure
 725 *
 726 * Configure the Tx unit of the MAC after a reset.
 727 */
 728static void igc_configure_tx(struct igc_adapter *adapter)
 729{
 730        int i;
 731
 732        for (i = 0; i < adapter->num_tx_queues; i++)
 733                igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
 734}
 735
 736/**
 737 * igc_setup_mrqc - configure the multiple receive queue control registers
 738 * @adapter: Board private structure
 739 */
 740static void igc_setup_mrqc(struct igc_adapter *adapter)
 741{
 742        struct igc_hw *hw = &adapter->hw;
 743        u32 j, num_rx_queues;
 744        u32 mrqc, rxcsum;
 745        u32 rss_key[10];
 746
 747        netdev_rss_key_fill(rss_key, sizeof(rss_key));
 748        for (j = 0; j < 10; j++)
 749                wr32(IGC_RSSRK(j), rss_key[j]);
 750
 751        num_rx_queues = adapter->rss_queues;
 752
 753        if (adapter->rss_indir_tbl_init != num_rx_queues) {
 754                for (j = 0; j < IGC_RETA_SIZE; j++)
 755                        adapter->rss_indir_tbl[j] =
 756                        (j * num_rx_queues) / IGC_RETA_SIZE;
 757                adapter->rss_indir_tbl_init = num_rx_queues;
 758        }
 759        igc_write_rss_indir_tbl(adapter);
 760
 761        /* Disable raw packet checksumming so that RSS hash is placed in
 762         * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
 763         * offloads as they are enabled by default
 764         */
 765        rxcsum = rd32(IGC_RXCSUM);
 766        rxcsum |= IGC_RXCSUM_PCSD;
 767
 768        /* Enable Receive Checksum Offload for SCTP */
 769        rxcsum |= IGC_RXCSUM_CRCOFL;
 770
 771        /* Don't need to set TUOFL or IPOFL, they default to 1 */
 772        wr32(IGC_RXCSUM, rxcsum);
 773
 774        /* Generate RSS hash based on packet types, TCP/UDP
 775         * port numbers and/or IPv4/v6 src and dst addresses
 776         */
 777        mrqc = IGC_MRQC_RSS_FIELD_IPV4 |
 778               IGC_MRQC_RSS_FIELD_IPV4_TCP |
 779               IGC_MRQC_RSS_FIELD_IPV6 |
 780               IGC_MRQC_RSS_FIELD_IPV6_TCP |
 781               IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
 782
 783        if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
 784                mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
 785        if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
 786                mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
 787
 788        mrqc |= IGC_MRQC_ENABLE_RSS_MQ;
 789
 790        wr32(IGC_MRQC, mrqc);
 791}
 792
 793/**
 794 * igc_setup_rctl - configure the receive control registers
 795 * @adapter: Board private structure
 796 */
 797static void igc_setup_rctl(struct igc_adapter *adapter)
 798{
 799        struct igc_hw *hw = &adapter->hw;
 800        u32 rctl;
 801
 802        rctl = rd32(IGC_RCTL);
 803
 804        rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
 805        rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
 806
 807        rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
 808                (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
 809
 810        /* enable stripping of CRC. Newer features require
 811         * that the HW strips the CRC.
 812         */
 813        rctl |= IGC_RCTL_SECRC;
 814
 815        /* disable store bad packets and clear size bits. */
 816        rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
 817
 818        /* enable LPE to allow for reception of jumbo frames */
 819        rctl |= IGC_RCTL_LPE;
 820
 821        /* disable queue 0 to prevent tail write w/o re-config */
 822        wr32(IGC_RXDCTL(0), 0);
 823
 824        /* This is useful for sniffing bad packets. */
 825        if (adapter->netdev->features & NETIF_F_RXALL) {
 826                /* UPE and MPE will be handled by normal PROMISC logic
 827                 * in set_rx_mode
 828                 */
 829                rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
 830                         IGC_RCTL_BAM | /* RX All Bcast Pkts */
 831                         IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
 832
 833                rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
 834                          IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
 835        }
 836
 837        wr32(IGC_RCTL, rctl);
 838}
 839
 840/**
 841 * igc_setup_tctl - configure the transmit control registers
 842 * @adapter: Board private structure
 843 */
 844static void igc_setup_tctl(struct igc_adapter *adapter)
 845{
 846        struct igc_hw *hw = &adapter->hw;
 847        u32 tctl;
 848
 849        /* disable queue 0 which icould be enabled by default */
 850        wr32(IGC_TXDCTL(0), 0);
 851
 852        /* Program the Transmit Control Register */
 853        tctl = rd32(IGC_TCTL);
 854        tctl &= ~IGC_TCTL_CT;
 855        tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
 856                (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
 857
 858        /* Enable transmits */
 859        tctl |= IGC_TCTL_EN;
 860
 861        wr32(IGC_TCTL, tctl);
 862}
 863
 864/**
 865 * igc_set_mac_filter_hw() - Set MAC address filter in hardware
 866 * @adapter: Pointer to adapter where the filter should be set
 867 * @index: Filter index
 868 * @type: MAC address filter type (source or destination)
 869 * @addr: MAC address
 870 * @queue: If non-negative, queue assignment feature is enabled and frames
 871 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
 872 *         assignment is disabled.
 873 */
 874static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
 875                                  enum igc_mac_filter_type type,
 876                                  const u8 *addr, int queue)
 877{
 878        struct net_device *dev = adapter->netdev;
 879        struct igc_hw *hw = &adapter->hw;
 880        u32 ral, rah;
 881
 882        if (WARN_ON(index >= hw->mac.rar_entry_count))
 883                return;
 884
 885        ral = le32_to_cpup((__le32 *)(addr));
 886        rah = le16_to_cpup((__le16 *)(addr + 4));
 887
 888        if (type == IGC_MAC_FILTER_TYPE_SRC) {
 889                rah &= ~IGC_RAH_ASEL_MASK;
 890                rah |= IGC_RAH_ASEL_SRC_ADDR;
 891        }
 892
 893        if (queue >= 0) {
 894                rah &= ~IGC_RAH_QSEL_MASK;
 895                rah |= (queue << IGC_RAH_QSEL_SHIFT);
 896                rah |= IGC_RAH_QSEL_ENABLE;
 897        }
 898
 899        rah |= IGC_RAH_AV;
 900
 901        wr32(IGC_RAL(index), ral);
 902        wr32(IGC_RAH(index), rah);
 903
 904        netdev_dbg(dev, "MAC address filter set in HW: index %d", index);
 905}
 906
 907/**
 908 * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware
 909 * @adapter: Pointer to adapter where the filter should be cleared
 910 * @index: Filter index
 911 */
 912static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
 913{
 914        struct net_device *dev = adapter->netdev;
 915        struct igc_hw *hw = &adapter->hw;
 916
 917        if (WARN_ON(index >= hw->mac.rar_entry_count))
 918                return;
 919
 920        wr32(IGC_RAL(index), 0);
 921        wr32(IGC_RAH(index), 0);
 922
 923        netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index);
 924}
 925
 926/* Set default MAC address for the PF in the first RAR entry */
 927static void igc_set_default_mac_filter(struct igc_adapter *adapter)
 928{
 929        struct net_device *dev = adapter->netdev;
 930        u8 *addr = adapter->hw.mac.addr;
 931
 932        netdev_dbg(dev, "Set default MAC address filter: address %pM", addr);
 933
 934        igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1);
 935}
 936
 937/**
 938 * igc_set_mac - Change the Ethernet Address of the NIC
 939 * @netdev: network interface device structure
 940 * @p: pointer to an address structure
 941 *
 942 * Returns 0 on success, negative on failure
 943 */
 944static int igc_set_mac(struct net_device *netdev, void *p)
 945{
 946        struct igc_adapter *adapter = netdev_priv(netdev);
 947        struct igc_hw *hw = &adapter->hw;
 948        struct sockaddr *addr = p;
 949
 950        if (!is_valid_ether_addr(addr->sa_data))
 951                return -EADDRNOTAVAIL;
 952
 953        eth_hw_addr_set(netdev, addr->sa_data);
 954        memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
 955
 956        /* set the correct pool for the new PF MAC address in entry 0 */
 957        igc_set_default_mac_filter(adapter);
 958
 959        return 0;
 960}
 961
 962/**
 963 *  igc_write_mc_addr_list - write multicast addresses to MTA
 964 *  @netdev: network interface device structure
 965 *
 966 *  Writes multicast address list to the MTA hash table.
 967 *  Returns: -ENOMEM on failure
 968 *           0 on no addresses written
 969 *           X on writing X addresses to MTA
 970 **/
 971static int igc_write_mc_addr_list(struct net_device *netdev)
 972{
 973        struct igc_adapter *adapter = netdev_priv(netdev);
 974        struct igc_hw *hw = &adapter->hw;
 975        struct netdev_hw_addr *ha;
 976        u8  *mta_list;
 977        int i;
 978
 979        if (netdev_mc_empty(netdev)) {
 980                /* nothing to program, so clear mc list */
 981                igc_update_mc_addr_list(hw, NULL, 0);
 982                return 0;
 983        }
 984
 985        mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC);
 986        if (!mta_list)
 987                return -ENOMEM;
 988
 989        /* The shared function expects a packed array of only addresses. */
 990        i = 0;
 991        netdev_for_each_mc_addr(ha, netdev)
 992                memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
 993
 994        igc_update_mc_addr_list(hw, mta_list, i);
 995        kfree(mta_list);
 996
 997        return netdev_mc_count(netdev);
 998}
 999
1000static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime)
1001{
1002        ktime_t cycle_time = adapter->cycle_time;
1003        ktime_t base_time = adapter->base_time;
1004        u32 launchtime;
1005
1006        /* FIXME: when using ETF together with taprio, we may have a
1007         * case where 'delta' is larger than the cycle_time, this may
1008         * cause problems if we don't read the current value of
1009         * IGC_BASET, as the value writen into the launchtime
1010         * descriptor field may be misinterpreted.
1011         */
1012        div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime);
1013
1014        return cpu_to_le32(launchtime);
1015}
1016
1017static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
1018                            struct igc_tx_buffer *first,
1019                            u32 vlan_macip_lens, u32 type_tucmd,
1020                            u32 mss_l4len_idx)
1021{
1022        struct igc_adv_tx_context_desc *context_desc;
1023        u16 i = tx_ring->next_to_use;
1024
1025        context_desc = IGC_TX_CTXTDESC(tx_ring, i);
1026
1027        i++;
1028        tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1029
1030        /* set bits to identify this as an advanced context descriptor */
1031        type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
1032
1033        /* For i225, context index must be unique per ring. */
1034        if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
1035                mss_l4len_idx |= tx_ring->reg_idx << 4;
1036
1037        context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
1038        context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
1039        context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
1040
1041        /* We assume there is always a valid Tx time available. Invalid times
1042         * should have been handled by the upper layers.
1043         */
1044        if (tx_ring->launchtime_enable) {
1045                struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
1046                ktime_t txtime = first->skb->tstamp;
1047
1048                skb_txtime_consumed(first->skb);
1049                context_desc->launch_time = igc_tx_launchtime(adapter,
1050                                                              txtime);
1051        } else {
1052                context_desc->launch_time = 0;
1053        }
1054}
1055
1056static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
1057{
1058        struct sk_buff *skb = first->skb;
1059        u32 vlan_macip_lens = 0;
1060        u32 type_tucmd = 0;
1061
1062        if (skb->ip_summed != CHECKSUM_PARTIAL) {
1063csum_failed:
1064                if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) &&
1065                    !tx_ring->launchtime_enable)
1066                        return;
1067                goto no_csum;
1068        }
1069
1070        switch (skb->csum_offset) {
1071        case offsetof(struct tcphdr, check):
1072                type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1073                fallthrough;
1074        case offsetof(struct udphdr, check):
1075                break;
1076        case offsetof(struct sctphdr, checksum):
1077                /* validate that this is actually an SCTP request */
1078                if (skb_csum_is_sctp(skb)) {
1079                        type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP;
1080                        break;
1081                }
1082                fallthrough;
1083        default:
1084                skb_checksum_help(skb);
1085                goto csum_failed;
1086        }
1087
1088        /* update TX checksum flag */
1089        first->tx_flags |= IGC_TX_FLAGS_CSUM;
1090        vlan_macip_lens = skb_checksum_start_offset(skb) -
1091                          skb_network_offset(skb);
1092no_csum:
1093        vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
1094        vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1095
1096        igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
1097}
1098
1099static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1100{
1101        struct net_device *netdev = tx_ring->netdev;
1102
1103        netif_stop_subqueue(netdev, tx_ring->queue_index);
1104
1105        /* memory barriier comment */
1106        smp_mb();
1107
1108        /* We need to check again in a case another CPU has just
1109         * made room available.
1110         */
1111        if (igc_desc_unused(tx_ring) < size)
1112                return -EBUSY;
1113
1114        /* A reprieve! */
1115        netif_wake_subqueue(netdev, tx_ring->queue_index);
1116
1117        u64_stats_update_begin(&tx_ring->tx_syncp2);
1118        tx_ring->tx_stats.restart_queue2++;
1119        u64_stats_update_end(&tx_ring->tx_syncp2);
1120
1121        return 0;
1122}
1123
1124static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1125{
1126        if (igc_desc_unused(tx_ring) >= size)
1127                return 0;
1128        return __igc_maybe_stop_tx(tx_ring, size);
1129}
1130
1131#define IGC_SET_FLAG(_input, _flag, _result) \
1132        (((_flag) <= (_result)) ?                               \
1133         ((u32)((_input) & (_flag)) * ((_result) / (_flag))) :  \
1134         ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
1135
1136static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
1137{
1138        /* set type for advanced descriptor with frame checksum insertion */
1139        u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
1140                       IGC_ADVTXD_DCMD_DEXT |
1141                       IGC_ADVTXD_DCMD_IFCS;
1142
1143        /* set HW vlan bit if vlan is present */
1144        cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN,
1145                                 IGC_ADVTXD_DCMD_VLE);
1146
1147        /* set segmentation bits for TSO */
1148        cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO,
1149                                 (IGC_ADVTXD_DCMD_TSE));
1150
1151        /* set timestamp bit if present */
1152        cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
1153                                 (IGC_ADVTXD_MAC_TSTAMP));
1154
1155        /* insert frame checksum */
1156        cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS);
1157
1158        return cmd_type;
1159}
1160
1161static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
1162                                 union igc_adv_tx_desc *tx_desc,
1163                                 u32 tx_flags, unsigned int paylen)
1164{
1165        u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
1166
1167        /* insert L4 checksum */
1168        olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
1169                          ((IGC_TXD_POPTS_TXSM << 8) /
1170                          IGC_TX_FLAGS_CSUM);
1171
1172        /* insert IPv4 checksum */
1173        olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
1174                          (((IGC_TXD_POPTS_IXSM << 8)) /
1175                          IGC_TX_FLAGS_IPV4);
1176
1177        tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
1178}
1179
1180static int igc_tx_map(struct igc_ring *tx_ring,
1181                      struct igc_tx_buffer *first,
1182                      const u8 hdr_len)
1183{
1184        struct sk_buff *skb = first->skb;
1185        struct igc_tx_buffer *tx_buffer;
1186        union igc_adv_tx_desc *tx_desc;
1187        u32 tx_flags = first->tx_flags;
1188        skb_frag_t *frag;
1189        u16 i = tx_ring->next_to_use;
1190        unsigned int data_len, size;
1191        dma_addr_t dma;
1192        u32 cmd_type;
1193
1194        cmd_type = igc_tx_cmd_type(skb, tx_flags);
1195        tx_desc = IGC_TX_DESC(tx_ring, i);
1196
1197        igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
1198
1199        size = skb_headlen(skb);
1200        data_len = skb->data_len;
1201
1202        dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
1203
1204        tx_buffer = first;
1205
1206        for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
1207                if (dma_mapping_error(tx_ring->dev, dma))
1208                        goto dma_error;
1209
1210                /* record length, and DMA address */
1211                dma_unmap_len_set(tx_buffer, len, size);
1212                dma_unmap_addr_set(tx_buffer, dma, dma);
1213
1214                tx_desc->read.buffer_addr = cpu_to_le64(dma);
1215
1216                while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
1217                        tx_desc->read.cmd_type_len =
1218                                cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
1219
1220                        i++;
1221                        tx_desc++;
1222                        if (i == tx_ring->count) {
1223                                tx_desc = IGC_TX_DESC(tx_ring, 0);
1224                                i = 0;
1225                        }
1226                        tx_desc->read.olinfo_status = 0;
1227
1228                        dma += IGC_MAX_DATA_PER_TXD;
1229                        size -= IGC_MAX_DATA_PER_TXD;
1230
1231                        tx_desc->read.buffer_addr = cpu_to_le64(dma);
1232                }
1233
1234                if (likely(!data_len))
1235                        break;
1236
1237                tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
1238
1239                i++;
1240                tx_desc++;
1241                if (i == tx_ring->count) {
1242                        tx_desc = IGC_TX_DESC(tx_ring, 0);
1243                        i = 0;
1244                }
1245                tx_desc->read.olinfo_status = 0;
1246
1247                size = skb_frag_size(frag);
1248                data_len -= size;
1249
1250                dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
1251                                       size, DMA_TO_DEVICE);
1252
1253                tx_buffer = &tx_ring->tx_buffer_info[i];
1254        }
1255
1256        /* write last descriptor with RS and EOP bits */
1257        cmd_type |= size | IGC_TXD_DCMD;
1258        tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
1259
1260        netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
1261
1262        /* set the timestamp */
1263        first->time_stamp = jiffies;
1264
1265        skb_tx_timestamp(skb);
1266
1267        /* Force memory writes to complete before letting h/w know there
1268         * are new descriptors to fetch.  (Only applicable for weak-ordered
1269         * memory model archs, such as IA-64).
1270         *
1271         * We also need this memory barrier to make certain all of the
1272         * status bits have been updated before next_to_watch is written.
1273         */
1274        wmb();
1275
1276        /* set next_to_watch value indicating a packet is present */
1277        first->next_to_watch = tx_desc;
1278
1279        i++;
1280        if (i == tx_ring->count)
1281                i = 0;
1282
1283        tx_ring->next_to_use = i;
1284
1285        /* Make sure there is space in the ring for the next send. */
1286        igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
1287
1288        if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
1289                writel(i, tx_ring->tail);
1290        }
1291
1292        return 0;
1293dma_error:
1294        netdev_err(tx_ring->netdev, "TX DMA map failed\n");
1295        tx_buffer = &tx_ring->tx_buffer_info[i];
1296
1297        /* clear dma mappings for failed tx_buffer_info map */
1298        while (tx_buffer != first) {
1299                if (dma_unmap_len(tx_buffer, len))
1300                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1301
1302                if (i-- == 0)
1303                        i += tx_ring->count;
1304                tx_buffer = &tx_ring->tx_buffer_info[i];
1305        }
1306
1307        if (dma_unmap_len(tx_buffer, len))
1308                igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1309
1310        dev_kfree_skb_any(tx_buffer->skb);
1311        tx_buffer->skb = NULL;
1312
1313        tx_ring->next_to_use = i;
1314
1315        return -1;
1316}
1317
1318static int igc_tso(struct igc_ring *tx_ring,
1319                   struct igc_tx_buffer *first,
1320                   u8 *hdr_len)
1321{
1322        u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
1323        struct sk_buff *skb = first->skb;
1324        union {
1325                struct iphdr *v4;
1326                struct ipv6hdr *v6;
1327                unsigned char *hdr;
1328        } ip;
1329        union {
1330                struct tcphdr *tcp;
1331                struct udphdr *udp;
1332                unsigned char *hdr;
1333        } l4;
1334        u32 paylen, l4_offset;
1335        int err;
1336
1337        if (skb->ip_summed != CHECKSUM_PARTIAL)
1338                return 0;
1339
1340        if (!skb_is_gso(skb))
1341                return 0;
1342
1343        err = skb_cow_head(skb, 0);
1344        if (err < 0)
1345                return err;
1346
1347        ip.hdr = skb_network_header(skb);
1348        l4.hdr = skb_checksum_start(skb);
1349
1350        /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
1351        type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1352
1353        /* initialize outer IP header fields */
1354        if (ip.v4->version == 4) {
1355                unsigned char *csum_start = skb_checksum_start(skb);
1356                unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
1357
1358                /* IP header will have to cancel out any data that
1359                 * is not a part of the outer IP header
1360                 */
1361                ip.v4->check = csum_fold(csum_partial(trans_start,
1362                                                      csum_start - trans_start,
1363                                                      0));
1364                type_tucmd |= IGC_ADVTXD_TUCMD_IPV4;
1365
1366                ip.v4->tot_len = 0;
1367                first->tx_flags |= IGC_TX_FLAGS_TSO |
1368                                   IGC_TX_FLAGS_CSUM |
1369                                   IGC_TX_FLAGS_IPV4;
1370        } else {
1371                ip.v6->payload_len = 0;
1372                first->tx_flags |= IGC_TX_FLAGS_TSO |
1373                                   IGC_TX_FLAGS_CSUM;
1374        }
1375
1376        /* determine offset of inner transport header */
1377        l4_offset = l4.hdr - skb->data;
1378
1379        /* remove payload length from inner checksum */
1380        paylen = skb->len - l4_offset;
1381        if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) {
1382                /* compute length of segmentation header */
1383                *hdr_len = (l4.tcp->doff * 4) + l4_offset;
1384                csum_replace_by_diff(&l4.tcp->check,
1385                                     (__force __wsum)htonl(paylen));
1386        } else {
1387                /* compute length of segmentation header */
1388                *hdr_len = sizeof(*l4.udp) + l4_offset;
1389                csum_replace_by_diff(&l4.udp->check,
1390                                     (__force __wsum)htonl(paylen));
1391        }
1392
1393        /* update gso size and bytecount with header size */
1394        first->gso_segs = skb_shinfo(skb)->gso_segs;
1395        first->bytecount += (first->gso_segs - 1) * *hdr_len;
1396
1397        /* MSS L4LEN IDX */
1398        mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT;
1399        mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT;
1400
1401        /* VLAN MACLEN IPLEN */
1402        vlan_macip_lens = l4.hdr - ip.hdr;
1403        vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
1404        vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1405
1406        igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
1407                        type_tucmd, mss_l4len_idx);
1408
1409        return 1;
1410}
1411
1412static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
1413                                       struct igc_ring *tx_ring)
1414{
1415        u16 count = TXD_USE_COUNT(skb_headlen(skb));
1416        __be16 protocol = vlan_get_protocol(skb);
1417        struct igc_tx_buffer *first;
1418        u32 tx_flags = 0;
1419        unsigned short f;
1420        u8 hdr_len = 0;
1421        int tso = 0;
1422
1423        /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
1424         *      + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
1425         *      + 2 desc gap to keep tail from touching head,
1426         *      + 1 desc for context descriptor,
1427         * otherwise try next time
1428         */
1429        for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
1430                count += TXD_USE_COUNT(skb_frag_size(
1431                                                &skb_shinfo(skb)->frags[f]));
1432
1433        if (igc_maybe_stop_tx(tx_ring, count + 3)) {
1434                /* this is a hard error */
1435                return NETDEV_TX_BUSY;
1436        }
1437
1438        /* record the location of the first descriptor for this packet */
1439        first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
1440        first->type = IGC_TX_BUFFER_TYPE_SKB;
1441        first->skb = skb;
1442        first->bytecount = skb->len;
1443        first->gso_segs = 1;
1444
1445        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
1446                struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
1447
1448                /* FIXME: add support for retrieving timestamps from
1449                 * the other timer registers before skipping the
1450                 * timestamping request.
1451                 */
1452                if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
1453                    !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
1454                                           &adapter->state)) {
1455                        skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1456                        tx_flags |= IGC_TX_FLAGS_TSTAMP;
1457
1458                        adapter->ptp_tx_skb = skb_get(skb);
1459                        adapter->ptp_tx_start = jiffies;
1460                } else {
1461                        adapter->tx_hwtstamp_skipped++;
1462                }
1463        }
1464
1465        if (skb_vlan_tag_present(skb)) {
1466                tx_flags |= IGC_TX_FLAGS_VLAN;
1467                tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT);
1468        }
1469
1470        /* record initial flags and protocol */
1471        first->tx_flags = tx_flags;
1472        first->protocol = protocol;
1473
1474        tso = igc_tso(tx_ring, first, &hdr_len);
1475        if (tso < 0)
1476                goto out_drop;
1477        else if (!tso)
1478                igc_tx_csum(tx_ring, first);
1479
1480        igc_tx_map(tx_ring, first, hdr_len);
1481
1482        return NETDEV_TX_OK;
1483
1484out_drop:
1485        dev_kfree_skb_any(first->skb);
1486        first->skb = NULL;
1487
1488        return NETDEV_TX_OK;
1489}
1490
1491static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
1492                                                    struct sk_buff *skb)
1493{
1494        unsigned int r_idx = skb->queue_mapping;
1495
1496        if (r_idx >= adapter->num_tx_queues)
1497                r_idx = r_idx % adapter->num_tx_queues;
1498
1499        return adapter->tx_ring[r_idx];
1500}
1501
1502static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
1503                                  struct net_device *netdev)
1504{
1505        struct igc_adapter *adapter = netdev_priv(netdev);
1506
1507        /* The minimum packet size with TCTL.PSP set is 17 so pad the skb
1508         * in order to meet this minimum size requirement.
1509         */
1510        if (skb->len < 17) {
1511                if (skb_padto(skb, 17))
1512                        return NETDEV_TX_OK;
1513                skb->len = 17;
1514        }
1515
1516        return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
1517}
1518
1519static void igc_rx_checksum(struct igc_ring *ring,
1520                            union igc_adv_rx_desc *rx_desc,
1521                            struct sk_buff *skb)
1522{
1523        skb_checksum_none_assert(skb);
1524
1525        /* Ignore Checksum bit is set */
1526        if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM))
1527                return;
1528
1529        /* Rx checksum disabled via ethtool */
1530        if (!(ring->netdev->features & NETIF_F_RXCSUM))
1531                return;
1532
1533        /* TCP/UDP checksum error bit is set */
1534        if (igc_test_staterr(rx_desc,
1535                             IGC_RXDEXT_STATERR_L4E |
1536                             IGC_RXDEXT_STATERR_IPE)) {
1537                /* work around errata with sctp packets where the TCPE aka
1538                 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
1539                 * packets (aka let the stack check the crc32c)
1540                 */
1541                if (!(skb->len == 60 &&
1542                      test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
1543                        u64_stats_update_begin(&ring->rx_syncp);
1544                        ring->rx_stats.csum_err++;
1545                        u64_stats_update_end(&ring->rx_syncp);
1546                }
1547                /* let the stack verify checksum errors */
1548                return;
1549        }
1550        /* It must be a TCP or UDP packet with a valid checksum */
1551        if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS |
1552                                      IGC_RXD_STAT_UDPCS))
1553                skb->ip_summed = CHECKSUM_UNNECESSARY;
1554
1555        netdev_dbg(ring->netdev, "cksum success: bits %08X\n",
1556                   le32_to_cpu(rx_desc->wb.upper.status_error));
1557}
1558
1559static inline void igc_rx_hash(struct igc_ring *ring,
1560                               union igc_adv_rx_desc *rx_desc,
1561                               struct sk_buff *skb)
1562{
1563        if (ring->netdev->features & NETIF_F_RXHASH)
1564                skb_set_hash(skb,
1565                             le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
1566                             PKT_HASH_TYPE_L3);
1567}
1568
1569static void igc_rx_vlan(struct igc_ring *rx_ring,
1570                        union igc_adv_rx_desc *rx_desc,
1571                        struct sk_buff *skb)
1572{
1573        struct net_device *dev = rx_ring->netdev;
1574        u16 vid;
1575
1576        if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
1577            igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) {
1578                if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) &&
1579                    test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
1580                        vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
1581                else
1582                        vid = le16_to_cpu(rx_desc->wb.upper.vlan);
1583
1584                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
1585        }
1586}
1587
1588/**
1589 * igc_process_skb_fields - Populate skb header fields from Rx descriptor
1590 * @rx_ring: rx descriptor ring packet is being transacted on
1591 * @rx_desc: pointer to the EOP Rx descriptor
1592 * @skb: pointer to current skb being populated
1593 *
1594 * This function checks the ring, descriptor, and packet information in order
1595 * to populate the hash, checksum, VLAN, protocol, and other fields within the
1596 * skb.
1597 */
1598static void igc_process_skb_fields(struct igc_ring *rx_ring,
1599                                   union igc_adv_rx_desc *rx_desc,
1600                                   struct sk_buff *skb)
1601{
1602        igc_rx_hash(rx_ring, rx_desc, skb);
1603
1604        igc_rx_checksum(rx_ring, rx_desc, skb);
1605
1606        igc_rx_vlan(rx_ring, rx_desc, skb);
1607
1608        skb_record_rx_queue(skb, rx_ring->queue_index);
1609
1610        skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1611}
1612
1613static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features)
1614{
1615        bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
1616        struct igc_adapter *adapter = netdev_priv(netdev);
1617        struct igc_hw *hw = &adapter->hw;
1618        u32 ctrl;
1619
1620        ctrl = rd32(IGC_CTRL);
1621
1622        if (enable) {
1623                /* enable VLAN tag insert/strip */
1624                ctrl |= IGC_CTRL_VME;
1625        } else {
1626                /* disable VLAN tag insert/strip */
1627                ctrl &= ~IGC_CTRL_VME;
1628        }
1629        wr32(IGC_CTRL, ctrl);
1630}
1631
1632static void igc_restore_vlan(struct igc_adapter *adapter)
1633{
1634        igc_vlan_mode(adapter->netdev, adapter->netdev->features);
1635}
1636
1637static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
1638                                               const unsigned int size,
1639                                               int *rx_buffer_pgcnt)
1640{
1641        struct igc_rx_buffer *rx_buffer;
1642
1643        rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
1644        *rx_buffer_pgcnt =
1645#if (PAGE_SIZE < 8192)
1646                page_count(rx_buffer->page);
1647#else
1648                0;
1649#endif
1650        prefetchw(rx_buffer->page);
1651
1652        /* we are reusing so sync this buffer for CPU use */
1653        dma_sync_single_range_for_cpu(rx_ring->dev,
1654                                      rx_buffer->dma,
1655                                      rx_buffer->page_offset,
1656                                      size,
1657                                      DMA_FROM_DEVICE);
1658
1659        rx_buffer->pagecnt_bias--;
1660
1661        return rx_buffer;
1662}
1663
1664static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer,
1665                               unsigned int truesize)
1666{
1667#if (PAGE_SIZE < 8192)
1668        buffer->page_offset ^= truesize;
1669#else
1670        buffer->page_offset += truesize;
1671#endif
1672}
1673
1674static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring,
1675                                              unsigned int size)
1676{
1677        unsigned int truesize;
1678
1679#if (PAGE_SIZE < 8192)
1680        truesize = igc_rx_pg_size(ring) / 2;
1681#else
1682        truesize = ring_uses_build_skb(ring) ?
1683                   SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
1684                   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1685                   SKB_DATA_ALIGN(size);
1686#endif
1687        return truesize;
1688}
1689
1690/**
1691 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
1692 * @rx_ring: rx descriptor ring to transact packets on
1693 * @rx_buffer: buffer containing page to add
1694 * @skb: sk_buff to place the data into
1695 * @size: size of buffer to be added
1696 *
1697 * This function will add the data contained in rx_buffer->page to the skb.
1698 */
1699static void igc_add_rx_frag(struct igc_ring *rx_ring,
1700                            struct igc_rx_buffer *rx_buffer,
1701                            struct sk_buff *skb,
1702                            unsigned int size)
1703{
1704        unsigned int truesize;
1705
1706#if (PAGE_SIZE < 8192)
1707        truesize = igc_rx_pg_size(rx_ring) / 2;
1708#else
1709        truesize = ring_uses_build_skb(rx_ring) ?
1710                   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1711                   SKB_DATA_ALIGN(size);
1712#endif
1713        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
1714                        rx_buffer->page_offset, size, truesize);
1715
1716        igc_rx_buffer_flip(rx_buffer, truesize);
1717}
1718
1719static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
1720                                     struct igc_rx_buffer *rx_buffer,
1721                                     struct xdp_buff *xdp)
1722{
1723        unsigned int size = xdp->data_end - xdp->data;
1724        unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1725        unsigned int metasize = xdp->data - xdp->data_meta;
1726        struct sk_buff *skb;
1727
1728        /* prefetch first cache line of first page */
1729        net_prefetch(xdp->data_meta);
1730
1731        /* build an skb around the page buffer */
1732        skb = napi_build_skb(xdp->data_hard_start, truesize);
1733        if (unlikely(!skb))
1734                return NULL;
1735
1736        /* update pointers within the skb to store the data */
1737        skb_reserve(skb, xdp->data - xdp->data_hard_start);
1738        __skb_put(skb, size);
1739        if (metasize)
1740                skb_metadata_set(skb, metasize);
1741
1742        igc_rx_buffer_flip(rx_buffer, truesize);
1743        return skb;
1744}
1745
1746static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
1747                                         struct igc_rx_buffer *rx_buffer,
1748                                         struct xdp_buff *xdp,
1749                                         ktime_t timestamp)
1750{
1751        unsigned int metasize = xdp->data - xdp->data_meta;
1752        unsigned int size = xdp->data_end - xdp->data;
1753        unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1754        void *va = xdp->data;
1755        unsigned int headlen;
1756        struct sk_buff *skb;
1757
1758        /* prefetch first cache line of first page */
1759        net_prefetch(xdp->data_meta);
1760
1761        /* allocate a skb to store the frags */
1762        skb = napi_alloc_skb(&rx_ring->q_vector->napi,
1763                             IGC_RX_HDR_LEN + metasize);
1764        if (unlikely(!skb))
1765                return NULL;
1766
1767        if (timestamp)
1768                skb_hwtstamps(skb)->hwtstamp = timestamp;
1769
1770        /* Determine available headroom for copy */
1771        headlen = size;
1772        if (headlen > IGC_RX_HDR_LEN)
1773                headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);
1774
1775        /* align pull length to size of long to optimize memcpy performance */
1776        memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
1777               ALIGN(headlen + metasize, sizeof(long)));
1778
1779        if (metasize) {
1780                skb_metadata_set(skb, metasize);
1781                __skb_pull(skb, metasize);
1782        }
1783
1784        /* update all of the pointers */
1785        size -= headlen;
1786        if (size) {
1787                skb_add_rx_frag(skb, 0, rx_buffer->page,
1788                                (va + headlen) - page_address(rx_buffer->page),
1789                                size, truesize);
1790                igc_rx_buffer_flip(rx_buffer, truesize);
1791        } else {
1792                rx_buffer->pagecnt_bias++;
1793        }
1794
1795        return skb;
1796}
1797
1798/**
1799 * igc_reuse_rx_page - page flip buffer and store it back on the ring
1800 * @rx_ring: rx descriptor ring to store buffers on
1801 * @old_buff: donor buffer to have page reused
1802 *
1803 * Synchronizes page for reuse by the adapter
1804 */
1805static void igc_reuse_rx_page(struct igc_ring *rx_ring,
1806                              struct igc_rx_buffer *old_buff)
1807{
1808        u16 nta = rx_ring->next_to_alloc;
1809        struct igc_rx_buffer *new_buff;
1810
1811        new_buff = &rx_ring->rx_buffer_info[nta];
1812
1813        /* update, and store next to alloc */
1814        nta++;
1815        rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
1816
1817        /* Transfer page from old buffer to new buffer.
1818         * Move each member individually to avoid possible store
1819         * forwarding stalls.
1820         */
1821        new_buff->dma           = old_buff->dma;
1822        new_buff->page          = old_buff->page;
1823        new_buff->page_offset   = old_buff->page_offset;
1824        new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
1825}
1826
1827static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer,
1828                                  int rx_buffer_pgcnt)
1829{
1830        unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
1831        struct page *page = rx_buffer->page;
1832
1833        /* avoid re-using remote and pfmemalloc pages */
1834        if (!dev_page_is_reusable(page))
1835                return false;
1836
1837#if (PAGE_SIZE < 8192)
1838        /* if we are only owner of page we can reuse it */
1839        if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
1840                return false;
1841#else
1842#define IGC_LAST_OFFSET \
1843        (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
1844
1845        if (rx_buffer->page_offset > IGC_LAST_OFFSET)
1846                return false;
1847#endif
1848
1849        /* If we have drained the page fragment pool we need to update
1850         * the pagecnt_bias and page count so that we fully restock the
1851         * number of references the driver holds.
1852         */
1853        if (unlikely(pagecnt_bias == 1)) {
1854                page_ref_add(page, USHRT_MAX - 1);
1855                rx_buffer->pagecnt_bias = USHRT_MAX;
1856        }
1857
1858        return true;
1859}
1860
1861/**
1862 * igc_is_non_eop - process handling of non-EOP buffers
1863 * @rx_ring: Rx ring being processed
1864 * @rx_desc: Rx descriptor for current buffer
1865 *
1866 * This function updates next to clean.  If the buffer is an EOP buffer
1867 * this function exits returning false, otherwise it will place the
1868 * sk_buff in the next buffer to be chained and return true indicating
1869 * that this is in fact a non-EOP buffer.
1870 */
1871static bool igc_is_non_eop(struct igc_ring *rx_ring,
1872                           union igc_adv_rx_desc *rx_desc)
1873{
1874        u32 ntc = rx_ring->next_to_clean + 1;
1875
1876        /* fetch, update, and store next to clean */
1877        ntc = (ntc < rx_ring->count) ? ntc : 0;
1878        rx_ring->next_to_clean = ntc;
1879
1880        prefetch(IGC_RX_DESC(rx_ring, ntc));
1881
1882        if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
1883                return false;
1884
1885        return true;
1886}
1887
1888/**
1889 * igc_cleanup_headers - Correct corrupted or empty headers
1890 * @rx_ring: rx descriptor ring packet is being transacted on
1891 * @rx_desc: pointer to the EOP Rx descriptor
1892 * @skb: pointer to current skb being fixed
1893 *
1894 * Address the case where we are pulling data in on pages only
1895 * and as such no data is present in the skb header.
1896 *
1897 * In addition if skb is not at least 60 bytes we need to pad it so that
1898 * it is large enough to qualify as a valid Ethernet frame.
1899 *
1900 * Returns true if an error was encountered and skb was freed.
1901 */
1902static bool igc_cleanup_headers(struct igc_ring *rx_ring,
1903                                union igc_adv_rx_desc *rx_desc,
1904                                struct sk_buff *skb)
1905{
1906        /* XDP packets use error pointer so abort at this point */
1907        if (IS_ERR(skb))
1908                return true;
1909
1910        if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
1911                struct net_device *netdev = rx_ring->netdev;
1912
1913                if (!(netdev->features & NETIF_F_RXALL)) {
1914                        dev_kfree_skb_any(skb);
1915                        return true;
1916                }
1917        }
1918
1919        /* if eth_skb_pad returns an error the skb was freed */
1920        if (eth_skb_pad(skb))
1921                return true;
1922
1923        return false;
1924}
1925
1926static void igc_put_rx_buffer(struct igc_ring *rx_ring,
1927                              struct igc_rx_buffer *rx_buffer,
1928                              int rx_buffer_pgcnt)
1929{
1930        if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
1931                /* hand second half of page back to the ring */
1932                igc_reuse_rx_page(rx_ring, rx_buffer);
1933        } else {
1934                /* We are not reusing the buffer so unmap it and free
1935                 * any references we are holding to it
1936                 */
1937                dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
1938                                     igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
1939                                     IGC_RX_DMA_ATTR);
1940                __page_frag_cache_drain(rx_buffer->page,
1941                                        rx_buffer->pagecnt_bias);
1942        }
1943
1944        /* clear contents of rx_buffer */
1945        rx_buffer->page = NULL;
1946}
1947
1948static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
1949{
1950        struct igc_adapter *adapter = rx_ring->q_vector->adapter;
1951
1952        if (ring_uses_build_skb(rx_ring))
1953                return IGC_SKB_PAD;
1954        if (igc_xdp_is_enabled(adapter))
1955                return XDP_PACKET_HEADROOM;
1956
1957        return 0;
1958}
1959
1960static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
1961                                  struct igc_rx_buffer *bi)
1962{
1963        struct page *page = bi->page;
1964        dma_addr_t dma;
1965
1966        /* since we are recycling buffers we should seldom need to alloc */
1967        if (likely(page))
1968                return true;
1969
1970        /* alloc new page for storage */
1971        page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
1972        if (unlikely(!page)) {
1973                rx_ring->rx_stats.alloc_failed++;
1974                return false;
1975        }
1976
1977        /* map page for use */
1978        dma = dma_map_page_attrs(rx_ring->dev, page, 0,
1979                                 igc_rx_pg_size(rx_ring),
1980                                 DMA_FROM_DEVICE,
1981                                 IGC_RX_DMA_ATTR);
1982
1983        /* if mapping failed free memory back to system since
1984         * there isn't much point in holding memory we can't use
1985         */
1986        if (dma_mapping_error(rx_ring->dev, dma)) {
1987                __free_page(page);
1988
1989                rx_ring->rx_stats.alloc_failed++;
1990                return false;
1991        }
1992
1993        bi->dma = dma;
1994        bi->page = page;
1995        bi->page_offset = igc_rx_offset(rx_ring);
1996        page_ref_add(page, USHRT_MAX - 1);
1997        bi->pagecnt_bias = USHRT_MAX;
1998
1999        return true;
2000}
2001
2002/**
2003 * igc_alloc_rx_buffers - Replace used receive buffers; packet split
2004 * @rx_ring: rx descriptor ring
2005 * @cleaned_count: number of buffers to clean
2006 */
2007static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
2008{
2009        union igc_adv_rx_desc *rx_desc;
2010        u16 i = rx_ring->next_to_use;
2011        struct igc_rx_buffer *bi;
2012        u16 bufsz;
2013
2014        /* nothing to do */
2015        if (!cleaned_count)
2016                return;
2017
2018        rx_desc = IGC_RX_DESC(rx_ring, i);
2019        bi = &rx_ring->rx_buffer_info[i];
2020        i -= rx_ring->count;
2021
2022        bufsz = igc_rx_bufsz(rx_ring);
2023
2024        do {
2025                if (!igc_alloc_mapped_page(rx_ring, bi))
2026                        break;
2027
2028                /* sync the buffer for use by the device */
2029                dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
2030                                                 bi->page_offset, bufsz,
2031                                                 DMA_FROM_DEVICE);
2032
2033                /* Refresh the desc even if buffer_addrs didn't change
2034                 * because each write-back erases this info.
2035                 */
2036                rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
2037
2038                rx_desc++;
2039                bi++;
2040                i++;
2041                if (unlikely(!i)) {
2042                        rx_desc = IGC_RX_DESC(rx_ring, 0);
2043                        bi = rx_ring->rx_buffer_info;
2044                        i -= rx_ring->count;
2045                }
2046
2047                /* clear the length for the next_to_use descriptor */
2048                rx_desc->wb.upper.length = 0;
2049
2050                cleaned_count--;
2051        } while (cleaned_count);
2052
2053        i += rx_ring->count;
2054
2055        if (rx_ring->next_to_use != i) {
2056                /* record the next descriptor to use */
2057                rx_ring->next_to_use = i;
2058
2059                /* update next to alloc since we have filled the ring */
2060                rx_ring->next_to_alloc = i;
2061
2062                /* Force memory writes to complete before letting h/w
2063                 * know there are new descriptors to fetch.  (Only
2064                 * applicable for weak-ordered memory model archs,
2065                 * such as IA-64).
2066                 */
2067                wmb();
2068                writel(i, rx_ring->tail);
2069        }
2070}
2071
2072static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
2073{
2074        union igc_adv_rx_desc *desc;
2075        u16 i = ring->next_to_use;
2076        struct igc_rx_buffer *bi;
2077        dma_addr_t dma;
2078        bool ok = true;
2079
2080        if (!count)
2081                return ok;
2082
2083        desc = IGC_RX_DESC(ring, i);
2084        bi = &ring->rx_buffer_info[i];
2085        i -= ring->count;
2086
2087        do {
2088                bi->xdp = xsk_buff_alloc(ring->xsk_pool);
2089                if (!bi->xdp) {
2090                        ok = false;
2091                        break;
2092                }
2093
2094                dma = xsk_buff_xdp_get_dma(bi->xdp);
2095                desc->read.pkt_addr = cpu_to_le64(dma);
2096
2097                desc++;
2098                bi++;
2099                i++;
2100                if (unlikely(!i)) {
2101                        desc = IGC_RX_DESC(ring, 0);
2102                        bi = ring->rx_buffer_info;
2103                        i -= ring->count;
2104                }
2105
2106                /* Clear the length for the next_to_use descriptor. */
2107                desc->wb.upper.length = 0;
2108
2109                count--;
2110        } while (count);
2111
2112        i += ring->count;
2113
2114        if (ring->next_to_use != i) {
2115                ring->next_to_use = i;
2116
2117                /* Force memory writes to complete before letting h/w
2118                 * know there are new descriptors to fetch.  (Only
2119                 * applicable for weak-ordered memory model archs,
2120                 * such as IA-64).
2121                 */
2122                wmb();
2123                writel(i, ring->tail);
2124        }
2125
2126        return ok;
2127}
2128
2129static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
2130                                  struct xdp_frame *xdpf,
2131                                  struct igc_ring *ring)
2132{
2133        dma_addr_t dma;
2134
2135        dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
2136        if (dma_mapping_error(ring->dev, dma)) {
2137                netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
2138                return -ENOMEM;
2139        }
2140
2141        buffer->type = IGC_TX_BUFFER_TYPE_XDP;
2142        buffer->xdpf = xdpf;
2143        buffer->protocol = 0;
2144        buffer->bytecount = xdpf->len;
2145        buffer->gso_segs = 1;
2146        buffer->time_stamp = jiffies;
2147        dma_unmap_len_set(buffer, len, xdpf->len);
2148        dma_unmap_addr_set(buffer, dma, dma);
2149        return 0;
2150}
2151
2152/* This function requires __netif_tx_lock is held by the caller. */
2153static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
2154                                      struct xdp_frame *xdpf)
2155{
2156        struct igc_tx_buffer *buffer;
2157        union igc_adv_tx_desc *desc;
2158        u32 cmd_type, olinfo_status;
2159        int err;
2160
2161        if (!igc_desc_unused(ring))
2162                return -EBUSY;
2163
2164        buffer = &ring->tx_buffer_info[ring->next_to_use];
2165        err = igc_xdp_init_tx_buffer(buffer, xdpf, ring);
2166        if (err)
2167                return err;
2168
2169        cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2170                   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
2171                   buffer->bytecount;
2172        olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
2173
2174        desc = IGC_TX_DESC(ring, ring->next_to_use);
2175        desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2176        desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2177        desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma));
2178
2179        netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount);
2180
2181        buffer->next_to_watch = desc;
2182
2183        ring->next_to_use++;
2184        if (ring->next_to_use == ring->count)
2185                ring->next_to_use = 0;
2186
2187        return 0;
2188}
2189
2190static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
2191                                            int cpu)
2192{
2193        int index = cpu;
2194
2195        if (unlikely(index < 0))
2196                index = 0;
2197
2198        while (index >= adapter->num_tx_queues)
2199                index -= adapter->num_tx_queues;
2200
2201        return adapter->tx_ring[index];
2202}
2203
2204static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
2205{
2206        struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
2207        int cpu = smp_processor_id();
2208        struct netdev_queue *nq;
2209        struct igc_ring *ring;
2210        int res;
2211
2212        if (unlikely(!xdpf))
2213                return -EFAULT;
2214
2215        ring = igc_xdp_get_tx_ring(adapter, cpu);
2216        nq = txring_txq(ring);
2217
2218        __netif_tx_lock(nq, cpu);
2219        res = igc_xdp_init_tx_descriptor(ring, xdpf);
2220        __netif_tx_unlock(nq);
2221        return res;
2222}
2223
2224/* This function assumes rcu_read_lock() is held by the caller. */
2225static int __igc_xdp_run_prog(struct igc_adapter *adapter,
2226                              struct bpf_prog *prog,
2227                              struct xdp_buff *xdp)
2228{
2229        u32 act = bpf_prog_run_xdp(prog, xdp);
2230
2231        switch (act) {
2232        case XDP_PASS:
2233                return IGC_XDP_PASS;
2234        case XDP_TX:
2235                if (igc_xdp_xmit_back(adapter, xdp) < 0)
2236                        goto out_failure;
2237                return IGC_XDP_TX;
2238        case XDP_REDIRECT:
2239                if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
2240                        goto out_failure;
2241                return IGC_XDP_REDIRECT;
2242                break;
2243        default:
2244                bpf_warn_invalid_xdp_action(adapter->netdev, prog, act);
2245                fallthrough;
2246        case XDP_ABORTED:
2247out_failure:
2248                trace_xdp_exception(adapter->netdev, prog, act);
2249                fallthrough;
2250        case XDP_DROP:
2251                return IGC_XDP_CONSUMED;
2252        }
2253}
2254
2255static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
2256                                        struct xdp_buff *xdp)
2257{
2258        struct bpf_prog *prog;
2259        int res;
2260
2261        prog = READ_ONCE(adapter->xdp_prog);
2262        if (!prog) {
2263                res = IGC_XDP_PASS;
2264                goto out;
2265        }
2266
2267        res = __igc_xdp_run_prog(adapter, prog, xdp);
2268
2269out:
2270        return ERR_PTR(-res);
2271}
2272
2273/* This function assumes __netif_tx_lock is held by the caller. */
2274static void igc_flush_tx_descriptors(struct igc_ring *ring)
2275{
2276        /* Once tail pointer is updated, hardware can fetch the descriptors
2277         * any time so we issue a write membar here to ensure all memory
2278         * writes are complete before the tail pointer is updated.
2279         */
2280        wmb();
2281        writel(ring->next_to_use, ring->tail);
2282}
2283
2284static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
2285{
2286        int cpu = smp_processor_id();
2287        struct netdev_queue *nq;
2288        struct igc_ring *ring;
2289
2290        if (status & IGC_XDP_TX) {
2291                ring = igc_xdp_get_tx_ring(adapter, cpu);
2292                nq = txring_txq(ring);
2293
2294                __netif_tx_lock(nq, cpu);
2295                igc_flush_tx_descriptors(ring);
2296                __netif_tx_unlock(nq);
2297        }
2298
2299        if (status & IGC_XDP_REDIRECT)
2300                xdp_do_flush();
2301}
2302
2303static void igc_update_rx_stats(struct igc_q_vector *q_vector,
2304                                unsigned int packets, unsigned int bytes)
2305{
2306        struct igc_ring *ring = q_vector->rx.ring;
2307
2308        u64_stats_update_begin(&ring->rx_syncp);
2309        ring->rx_stats.packets += packets;
2310        ring->rx_stats.bytes += bytes;
2311        u64_stats_update_end(&ring->rx_syncp);
2312
2313        q_vector->rx.total_packets += packets;
2314        q_vector->rx.total_bytes += bytes;
2315}
2316
2317static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
2318{
2319        unsigned int total_bytes = 0, total_packets = 0;
2320        struct igc_adapter *adapter = q_vector->adapter;
2321        struct igc_ring *rx_ring = q_vector->rx.ring;
2322        struct sk_buff *skb = rx_ring->skb;
2323        u16 cleaned_count = igc_desc_unused(rx_ring);
2324        int xdp_status = 0, rx_buffer_pgcnt;
2325
2326        while (likely(total_packets < budget)) {
2327                union igc_adv_rx_desc *rx_desc;
2328                struct igc_rx_buffer *rx_buffer;
2329                unsigned int size, truesize;
2330                ktime_t timestamp = 0;
2331                struct xdp_buff xdp;
2332                int pkt_offset = 0;
2333                void *pktbuf;
2334
2335                /* return some buffers to hardware, one at a time is too slow */
2336                if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
2337                        igc_alloc_rx_buffers(rx_ring, cleaned_count);
2338                        cleaned_count = 0;
2339                }
2340
2341                rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
2342                size = le16_to_cpu(rx_desc->wb.upper.length);
2343                if (!size)
2344                        break;
2345
2346                /* This memory barrier is needed to keep us from reading
2347                 * any other fields out of the rx_desc until we know the
2348                 * descriptor has been written back
2349                 */
2350                dma_rmb();
2351
2352                rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
2353                truesize = igc_get_rx_frame_truesize(rx_ring, size);
2354
2355                pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
2356
2357                if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
2358                        timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2359                                                        pktbuf);
2360                        pkt_offset = IGC_TS_HDR_LEN;
2361                        size -= IGC_TS_HDR_LEN;
2362                }
2363
2364                if (!skb) {
2365                        xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
2366                        xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
2367                                         igc_rx_offset(rx_ring) + pkt_offset,
2368                                         size, true);
2369
2370                        skb = igc_xdp_run_prog(adapter, &xdp);
2371                }
2372
2373                if (IS_ERR(skb)) {
2374                        unsigned int xdp_res = -PTR_ERR(skb);
2375
2376                        switch (xdp_res) {
2377                        case IGC_XDP_CONSUMED:
2378                                rx_buffer->pagecnt_bias++;
2379                                break;
2380                        case IGC_XDP_TX:
2381                        case IGC_XDP_REDIRECT:
2382                                igc_rx_buffer_flip(rx_buffer, truesize);
2383                                xdp_status |= xdp_res;
2384                                break;
2385                        }
2386
2387                        total_packets++;
2388                        total_bytes += size;
2389                } else if (skb)
2390                        igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
2391                else if (ring_uses_build_skb(rx_ring))
2392                        skb = igc_build_skb(rx_ring, rx_buffer, &xdp);
2393                else
2394                        skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
2395                                                timestamp);
2396
2397                /* exit if we failed to retrieve a buffer */
2398                if (!skb) {
2399                        rx_ring->rx_stats.alloc_failed++;
2400                        rx_buffer->pagecnt_bias++;
2401                        break;
2402                }
2403
2404                igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
2405                cleaned_count++;
2406
2407                /* fetch next buffer in frame if non-eop */
2408                if (igc_is_non_eop(rx_ring, rx_desc))
2409                        continue;
2410
2411                /* verify the packet layout is correct */
2412                if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
2413                        skb = NULL;
2414                        continue;
2415                }
2416
2417                /* probably a little skewed due to removing CRC */
2418                total_bytes += skb->len;
2419
2420                /* populate checksum, VLAN, and protocol */
2421                igc_process_skb_fields(rx_ring, rx_desc, skb);
2422
2423                napi_gro_receive(&q_vector->napi, skb);
2424
2425                /* reset skb pointer */
2426                skb = NULL;
2427
2428                /* update budget accounting */
2429                total_packets++;
2430        }
2431
2432        if (xdp_status)
2433                igc_finalize_xdp(adapter, xdp_status);
2434
2435        /* place incomplete frames back on ring for completion */
2436        rx_ring->skb = skb;
2437
2438        igc_update_rx_stats(q_vector, total_packets, total_bytes);
2439
2440        if (cleaned_count)
2441                igc_alloc_rx_buffers(rx_ring, cleaned_count);
2442
2443        return total_packets;
2444}
2445
2446static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
2447                                            struct xdp_buff *xdp)
2448{
2449        unsigned int metasize = xdp->data - xdp->data_meta;
2450        unsigned int datasize = xdp->data_end - xdp->data;
2451        unsigned int totalsize = metasize + datasize;
2452        struct sk_buff *skb;
2453
2454        skb = __napi_alloc_skb(&ring->q_vector->napi,
2455                               xdp->data_end - xdp->data_hard_start,
2456                               GFP_ATOMIC | __GFP_NOWARN);
2457        if (unlikely(!skb))
2458                return NULL;
2459
2460        skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
2461        memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize);
2462        if (metasize) {
2463                skb_metadata_set(skb, metasize);
2464                __skb_pull(skb, metasize);
2465        }
2466
2467        return skb;
2468}
2469
2470static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
2471                                union igc_adv_rx_desc *desc,
2472                                struct xdp_buff *xdp,
2473                                ktime_t timestamp)
2474{
2475        struct igc_ring *ring = q_vector->rx.ring;
2476        struct sk_buff *skb;
2477
2478        skb = igc_construct_skb_zc(ring, xdp);
2479        if (!skb) {
2480                ring->rx_stats.alloc_failed++;
2481                return;
2482        }
2483
2484        if (timestamp)
2485                skb_hwtstamps(skb)->hwtstamp = timestamp;
2486
2487        if (igc_cleanup_headers(ring, desc, skb))
2488                return;
2489
2490        igc_process_skb_fields(ring, desc, skb);
2491        napi_gro_receive(&q_vector->napi, skb);
2492}
2493
2494static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
2495{
2496        struct igc_adapter *adapter = q_vector->adapter;
2497        struct igc_ring *ring = q_vector->rx.ring;
2498        u16 cleaned_count = igc_desc_unused(ring);
2499        int total_bytes = 0, total_packets = 0;
2500        u16 ntc = ring->next_to_clean;
2501        struct bpf_prog *prog;
2502        bool failure = false;
2503        int xdp_status = 0;
2504
2505        rcu_read_lock();
2506
2507        prog = READ_ONCE(adapter->xdp_prog);
2508
2509        while (likely(total_packets < budget)) {
2510                union igc_adv_rx_desc *desc;
2511                struct igc_rx_buffer *bi;
2512                ktime_t timestamp = 0;
2513                unsigned int size;
2514                int res;
2515
2516                desc = IGC_RX_DESC(ring, ntc);
2517                size = le16_to_cpu(desc->wb.upper.length);
2518                if (!size)
2519                        break;
2520
2521                /* This memory barrier is needed to keep us from reading
2522                 * any other fields out of the rx_desc until we know the
2523                 * descriptor has been written back
2524                 */
2525                dma_rmb();
2526
2527                bi = &ring->rx_buffer_info[ntc];
2528
2529                if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
2530                        timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2531                                                        bi->xdp->data);
2532
2533                        bi->xdp->data += IGC_TS_HDR_LEN;
2534
2535                        /* HW timestamp has been copied into local variable. Metadata
2536                         * length when XDP program is called should be 0.
2537                         */
2538                        bi->xdp->data_meta += IGC_TS_HDR_LEN;
2539                        size -= IGC_TS_HDR_LEN;
2540                }
2541
2542                bi->xdp->data_end = bi->xdp->data + size;
2543                xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);
2544
2545                res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
2546                switch (res) {
2547                case IGC_XDP_PASS:
2548                        igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp);
2549                        fallthrough;
2550                case IGC_XDP_CONSUMED:
2551                        xsk_buff_free(bi->xdp);
2552                        break;
2553                case IGC_XDP_TX:
2554                case IGC_XDP_REDIRECT:
2555                        xdp_status |= res;
2556                        break;
2557                }
2558
2559                bi->xdp = NULL;
2560                total_bytes += size;
2561                total_packets++;
2562                cleaned_count++;
2563                ntc++;
2564                if (ntc == ring->count)
2565                        ntc = 0;
2566        }
2567
2568        ring->next_to_clean = ntc;
2569        rcu_read_unlock();
2570
2571        if (cleaned_count >= IGC_RX_BUFFER_WRITE)
2572                failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count);
2573
2574        if (xdp_status)
2575                igc_finalize_xdp(adapter, xdp_status);
2576
2577        igc_update_rx_stats(q_vector, total_packets, total_bytes);
2578
2579        if (xsk_uses_need_wakeup(ring->xsk_pool)) {
2580                if (failure || ring->next_to_clean == ring->next_to_use)
2581                        xsk_set_rx_need_wakeup(ring->xsk_pool);
2582                else
2583                        xsk_clear_rx_need_wakeup(ring->xsk_pool);
2584                return total_packets;
2585        }
2586
2587        return failure ? budget : total_packets;
2588}
2589
2590static void igc_update_tx_stats(struct igc_q_vector *q_vector,
2591                                unsigned int packets, unsigned int bytes)
2592{
2593        struct igc_ring *ring = q_vector->tx.ring;
2594
2595        u64_stats_update_begin(&ring->tx_syncp);
2596        ring->tx_stats.bytes += bytes;
2597        ring->tx_stats.packets += packets;
2598        u64_stats_update_end(&ring->tx_syncp);
2599
2600        q_vector->tx.total_bytes += bytes;
2601        q_vector->tx.total_packets += packets;
2602}
2603
2604static void igc_xdp_xmit_zc(struct igc_ring *ring)
2605{
2606        struct xsk_buff_pool *pool = ring->xsk_pool;
2607        struct netdev_queue *nq = txring_txq(ring);
2608        union igc_adv_tx_desc *tx_desc = NULL;
2609        int cpu = smp_processor_id();
2610        u16 ntu = ring->next_to_use;
2611        struct xdp_desc xdp_desc;
2612        u16 budget;
2613
2614        if (!netif_carrier_ok(ring->netdev))
2615                return;
2616
2617        __netif_tx_lock(nq, cpu);
2618
2619        budget = igc_desc_unused(ring);
2620
2621        while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
2622                u32 cmd_type, olinfo_status;
2623                struct igc_tx_buffer *bi;
2624                dma_addr_t dma;
2625
2626                cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2627                           IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
2628                           xdp_desc.len;
2629                olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT;
2630
2631                dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
2632                xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len);
2633
2634                tx_desc = IGC_TX_DESC(ring, ntu);
2635                tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2636                tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2637                tx_desc->read.buffer_addr = cpu_to_le64(dma);
2638
2639                bi = &ring->tx_buffer_info[ntu];
2640                bi->type = IGC_TX_BUFFER_TYPE_XSK;
2641                bi->protocol = 0;
2642                bi->bytecount = xdp_desc.len;
2643                bi->gso_segs = 1;
2644                bi->time_stamp = jiffies;
2645                bi->next_to_watch = tx_desc;
2646
2647                netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len);
2648
2649                ntu++;
2650                if (ntu == ring->count)
2651                        ntu = 0;
2652        }
2653
2654        ring->next_to_use = ntu;
2655        if (tx_desc) {
2656                igc_flush_tx_descriptors(ring);
2657                xsk_tx_release(pool);
2658        }
2659
2660        __netif_tx_unlock(nq);
2661}
2662
2663/**
2664 * igc_clean_tx_irq - Reclaim resources after transmit completes
2665 * @q_vector: pointer to q_vector containing needed info
2666 * @napi_budget: Used to determine if we are in netpoll
2667 *
2668 * returns true if ring is completely cleaned
2669 */
2670static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
2671{
2672        struct igc_adapter *adapter = q_vector->adapter;
2673        unsigned int total_bytes = 0, total_packets = 0;
2674        unsigned int budget = q_vector->tx.work_limit;
2675        struct igc_ring *tx_ring = q_vector->tx.ring;
2676        unsigned int i = tx_ring->next_to_clean;
2677        struct igc_tx_buffer *tx_buffer;
2678        union igc_adv_tx_desc *tx_desc;
2679        u32 xsk_frames = 0;
2680
2681        if (test_bit(__IGC_DOWN, &adapter->state))
2682                return true;
2683
2684        tx_buffer = &tx_ring->tx_buffer_info[i];
2685        tx_desc = IGC_TX_DESC(tx_ring, i);
2686        i -= tx_ring->count;
2687
2688        do {
2689                union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
2690
2691                /* if next_to_watch is not set then there is no work pending */
2692                if (!eop_desc)
2693                        break;
2694
2695                /* prevent any other reads prior to eop_desc */
2696                smp_rmb();
2697
2698                /* if DD is not set pending work has not been completed */
2699                if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
2700                        break;
2701
2702                /* clear next_to_watch to prevent false hangs */
2703                tx_buffer->next_to_watch = NULL;
2704
2705                /* update the statistics for this packet */
2706                total_bytes += tx_buffer->bytecount;
2707                total_packets += tx_buffer->gso_segs;
2708
2709                switch (tx_buffer->type) {
2710                case IGC_TX_BUFFER_TYPE_XSK:
2711                        xsk_frames++;
2712                        break;
2713                case IGC_TX_BUFFER_TYPE_XDP:
2714                        xdp_return_frame(tx_buffer->xdpf);
2715                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2716                        break;
2717                case IGC_TX_BUFFER_TYPE_SKB:
2718                        napi_consume_skb(tx_buffer->skb, napi_budget);
2719                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2720                        break;
2721                default:
2722                        netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
2723                        break;
2724                }
2725
2726                /* clear last DMA location and unmap remaining buffers */
2727                while (tx_desc != eop_desc) {
2728                        tx_buffer++;
2729                        tx_desc++;
2730                        i++;
2731                        if (unlikely(!i)) {
2732                                i -= tx_ring->count;
2733                                tx_buffer = tx_ring->tx_buffer_info;
2734                                tx_desc = IGC_TX_DESC(tx_ring, 0);
2735                        }
2736
2737                        /* unmap any remaining paged data */
2738                        if (dma_unmap_len(tx_buffer, len))
2739                                igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2740                }
2741
2742                /* move us one more past the eop_desc for start of next pkt */
2743                tx_buffer++;
2744                tx_desc++;
2745                i++;
2746                if (unlikely(!i)) {
2747                        i -= tx_ring->count;
2748                        tx_buffer = tx_ring->tx_buffer_info;
2749                        tx_desc = IGC_TX_DESC(tx_ring, 0);
2750                }
2751
2752                /* issue prefetch for next Tx descriptor */
2753                prefetch(tx_desc);
2754
2755                /* update budget accounting */
2756                budget--;
2757        } while (likely(budget));
2758
2759        netdev_tx_completed_queue(txring_txq(tx_ring),
2760                                  total_packets, total_bytes);
2761
2762        i += tx_ring->count;
2763        tx_ring->next_to_clean = i;
2764
2765        igc_update_tx_stats(q_vector, total_packets, total_bytes);
2766
2767        if (tx_ring->xsk_pool) {
2768                if (xsk_frames)
2769                        xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
2770                if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
2771                        xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
2772                igc_xdp_xmit_zc(tx_ring);
2773        }
2774
2775        if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
2776                struct igc_hw *hw = &adapter->hw;
2777
2778                /* Detect a transmit hang in hardware, this serializes the
2779                 * check with the clearing of time_stamp and movement of i
2780                 */
2781                clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
2782                if (tx_buffer->next_to_watch &&
2783                    time_after(jiffies, tx_buffer->time_stamp +
2784                    (adapter->tx_timeout_factor * HZ)) &&
2785                    !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
2786                        /* detected Tx unit hang */
2787                        netdev_err(tx_ring->netdev,
2788                                   "Detected Tx Unit Hang\n"
2789                                   "  Tx Queue             <%d>\n"
2790                                   "  TDH                  <%x>\n"
2791                                   "  TDT                  <%x>\n"
2792                                   "  next_to_use          <%x>\n"
2793                                   "  next_to_clean        <%x>\n"
2794                                   "buffer_info[next_to_clean]\n"
2795                                   "  time_stamp           <%lx>\n"
2796                                   "  next_to_watch        <%p>\n"
2797                                   "  jiffies              <%lx>\n"
2798                                   "  desc.status          <%x>\n",
2799                                   tx_ring->queue_index,
2800                                   rd32(IGC_TDH(tx_ring->reg_idx)),
2801                                   readl(tx_ring->tail),
2802                                   tx_ring->next_to_use,
2803                                   tx_ring->next_to_clean,
2804                                   tx_buffer->time_stamp,
2805                                   tx_buffer->next_to_watch,
2806                                   jiffies,
2807                                   tx_buffer->next_to_watch->wb.status);
2808                        netif_stop_subqueue(tx_ring->netdev,
2809                                            tx_ring->queue_index);
2810
2811                        /* we are about to reset, no point in enabling stuff */
2812                        return true;
2813                }
2814        }
2815
2816#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
2817        if (unlikely(total_packets &&
2818                     netif_carrier_ok(tx_ring->netdev) &&
2819                     igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
2820                /* Make sure that anybody stopping the queue after this
2821                 * sees the new next_to_clean.
2822                 */
2823                smp_mb();
2824                if (__netif_subqueue_stopped(tx_ring->netdev,
2825                                             tx_ring->queue_index) &&
2826                    !(test_bit(__IGC_DOWN, &adapter->state))) {
2827                        netif_wake_subqueue(tx_ring->netdev,
2828                                            tx_ring->queue_index);
2829
2830                        u64_stats_update_begin(&tx_ring->tx_syncp);
2831                        tx_ring->tx_stats.restart_queue++;
2832                        u64_stats_update_end(&tx_ring->tx_syncp);
2833                }
2834        }
2835
2836        return !!budget;
2837}
2838
2839static int igc_find_mac_filter(struct igc_adapter *adapter,
2840                               enum igc_mac_filter_type type, const u8 *addr)
2841{
2842        struct igc_hw *hw = &adapter->hw;
2843        int max_entries = hw->mac.rar_entry_count;
2844        u32 ral, rah;
2845        int i;
2846
2847        for (i = 0; i < max_entries; i++) {
2848                ral = rd32(IGC_RAL(i));
2849                rah = rd32(IGC_RAH(i));
2850
2851                if (!(rah & IGC_RAH_AV))
2852                        continue;
2853                if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type)
2854                        continue;
2855                if ((rah & IGC_RAH_RAH_MASK) !=
2856                    le16_to_cpup((__le16 *)(addr + 4)))
2857                        continue;
2858                if (ral != le32_to_cpup((__le32 *)(addr)))
2859                        continue;
2860
2861                return i;
2862        }
2863
2864        return -1;
2865}
2866
2867static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
2868{
2869        struct igc_hw *hw = &adapter->hw;
2870        int max_entries = hw->mac.rar_entry_count;
2871        u32 rah;
2872        int i;
2873
2874        for (i = 0; i < max_entries; i++) {
2875                rah = rd32(IGC_RAH(i));
2876
2877                if (!(rah & IGC_RAH_AV))
2878                        return i;
2879        }
2880
2881        return -1;
2882}
2883
2884/**
2885 * igc_add_mac_filter() - Add MAC address filter
2886 * @adapter: Pointer to adapter where the filter should be added
2887 * @type: MAC address filter type (source or destination)
2888 * @addr: MAC address
2889 * @queue: If non-negative, queue assignment feature is enabled and frames
2890 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
2891 *         assignment is disabled.
2892 *
2893 * Return: 0 in case of success, negative errno code otherwise.
2894 */
2895static int igc_add_mac_filter(struct igc_adapter *adapter,
2896                              enum igc_mac_filter_type type, const u8 *addr,
2897                              int queue)
2898{
2899        struct net_device *dev = adapter->netdev;
2900        int index;
2901
2902        index = igc_find_mac_filter(adapter, type, addr);
2903        if (index >= 0)
2904                goto update_filter;
2905
2906        index = igc_get_avail_mac_filter_slot(adapter);
2907        if (index < 0)
2908                return -ENOSPC;
2909
2910        netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n",
2911                   index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
2912                   addr, queue);
2913
2914update_filter:
2915        igc_set_mac_filter_hw(adapter, index, type, addr, queue);
2916        return 0;
2917}
2918
2919/**
2920 * igc_del_mac_filter() - Delete MAC address filter
2921 * @adapter: Pointer to adapter where the filter should be deleted from
2922 * @type: MAC address filter type (source or destination)
2923 * @addr: MAC address
2924 */
2925static void igc_del_mac_filter(struct igc_adapter *adapter,
2926                               enum igc_mac_filter_type type, const u8 *addr)
2927{
2928        struct net_device *dev = adapter->netdev;
2929        int index;
2930
2931        index = igc_find_mac_filter(adapter, type, addr);
2932        if (index < 0)
2933                return;
2934
2935        if (index == 0) {
2936                /* If this is the default filter, we don't actually delete it.
2937                 * We just reset to its default value i.e. disable queue
2938                 * assignment.
2939                 */
2940                netdev_dbg(dev, "Disable default MAC filter queue assignment");
2941
2942                igc_set_mac_filter_hw(adapter, 0, type, addr, -1);
2943        } else {
2944                netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n",
2945                           index,
2946                           type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
2947                           addr);
2948
2949                igc_clear_mac_filter_hw(adapter, index);
2950        }
2951}
2952
2953/**
2954 * igc_add_vlan_prio_filter() - Add VLAN priority filter
2955 * @adapter: Pointer to adapter where the filter should be added
2956 * @prio: VLAN priority value
2957 * @queue: Queue number which matching frames are assigned to
2958 *
2959 * Return: 0 in case of success, negative errno code otherwise.
2960 */
2961static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
2962                                    int queue)
2963{
2964        struct net_device *dev = adapter->netdev;
2965        struct igc_hw *hw = &adapter->hw;
2966        u32 vlanpqf;
2967
2968        vlanpqf = rd32(IGC_VLANPQF);
2969
2970        if (vlanpqf & IGC_VLANPQF_VALID(prio)) {
2971                netdev_dbg(dev, "VLAN priority filter already in use\n");
2972                return -EEXIST;
2973        }
2974
2975        vlanpqf |= IGC_VLANPQF_QSEL(prio, queue);
2976        vlanpqf |= IGC_VLANPQF_VALID(prio);
2977
2978        wr32(IGC_VLANPQF, vlanpqf);
2979
2980        netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n",
2981                   prio, queue);
2982        return 0;
2983}
2984
2985/**
2986 * igc_del_vlan_prio_filter() - Delete VLAN priority filter
2987 * @adapter: Pointer to adapter where the filter should be deleted from
2988 * @prio: VLAN priority value
2989 */
2990static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
2991{
2992        struct igc_hw *hw = &adapter->hw;
2993        u32 vlanpqf;
2994
2995        vlanpqf = rd32(IGC_VLANPQF);
2996
2997        vlanpqf &= ~IGC_VLANPQF_VALID(prio);
2998        vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK);
2999
3000        wr32(IGC_VLANPQF, vlanpqf);
3001
3002        netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n",
3003                   prio);
3004}
3005
3006static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter)
3007{
3008        struct igc_hw *hw = &adapter->hw;
3009        int i;
3010
3011        for (i = 0; i < MAX_ETYPE_FILTER; i++) {
3012                u32 etqf = rd32(IGC_ETQF(i));
3013
3014                if (!(etqf & IGC_ETQF_FILTER_ENABLE))
3015                        return i;
3016        }
3017
3018        return -1;
3019}
3020
3021/**
3022 * igc_add_etype_filter() - Add ethertype filter
3023 * @adapter: Pointer to adapter where the filter should be added
3024 * @etype: Ethertype value
3025 * @queue: If non-negative, queue assignment feature is enabled and frames
3026 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
3027 *         assignment is disabled.
3028 *
3029 * Return: 0 in case of success, negative errno code otherwise.
3030 */
3031static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype,
3032                                int queue)
3033{
3034        struct igc_hw *hw = &adapter->hw;
3035        int index;
3036        u32 etqf;
3037
3038        index = igc_get_avail_etype_filter_slot(adapter);
3039        if (index < 0)
3040                return -ENOSPC;
3041
3042        etqf = rd32(IGC_ETQF(index));
3043
3044        etqf &= ~IGC_ETQF_ETYPE_MASK;
3045        etqf |= etype;
3046
3047        if (queue >= 0) {
3048                etqf &= ~IGC_ETQF_QUEUE_MASK;
3049                etqf |= (queue << IGC_ETQF_QUEUE_SHIFT);
3050                etqf |= IGC_ETQF_QUEUE_ENABLE;
3051        }
3052
3053        etqf |= IGC_ETQF_FILTER_ENABLE;
3054
3055        wr32(IGC_ETQF(index), etqf);
3056
3057        netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n",
3058                   etype, queue);
3059        return 0;
3060}
3061
3062static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype)
3063{
3064        struct igc_hw *hw = &adapter->hw;
3065        int i;
3066
3067        for (i = 0; i < MAX_ETYPE_FILTER; i++) {
3068                u32 etqf = rd32(IGC_ETQF(i));
3069
3070                if ((etqf & IGC_ETQF_ETYPE_MASK) == etype)
3071                        return i;
3072        }
3073
3074        return -1;
3075}
3076
3077/**
3078 * igc_del_etype_filter() - Delete ethertype filter
3079 * @adapter: Pointer to adapter where the filter should be deleted from
3080 * @etype: Ethertype value
3081 */
3082static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
3083{
3084        struct igc_hw *hw = &adapter->hw;
3085        int index;
3086
3087        index = igc_find_etype_filter(adapter, etype);
3088        if (index < 0)
3089                return;
3090
3091        wr32(IGC_ETQF(index), 0);
3092
3093        netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n",
3094                   etype);
3095}
3096
3097static int igc_flex_filter_select(struct igc_adapter *adapter,
3098                                  struct igc_flex_filter *input,
3099                                  u32 *fhft)
3100{
3101        struct igc_hw *hw = &adapter->hw;
3102        u8 fhft_index;
3103        u32 fhftsl;
3104
3105        if (input->index >= MAX_FLEX_FILTER) {
3106                dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n");
3107                return -EINVAL;
3108        }
3109
3110        /* Indirect table select register */
3111        fhftsl = rd32(IGC_FHFTSL);
3112        fhftsl &= ~IGC_FHFTSL_FTSL_MASK;
3113        switch (input->index) {
3114        case 0 ... 7:
3115                fhftsl |= 0x00;
3116                break;
3117        case 8 ... 15:
3118                fhftsl |= 0x01;
3119                break;
3120        case 16 ... 23:
3121                fhftsl |= 0x02;
3122                break;
3123        case 24 ... 31:
3124                fhftsl |= 0x03;
3125                break;
3126        }
3127        wr32(IGC_FHFTSL, fhftsl);
3128
3129        /* Normalize index down to host table register */
3130        fhft_index = input->index % 8;
3131
3132        *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) :
3133                IGC_FHFT_EXT(fhft_index - 4);
3134
3135        return 0;
3136}
3137
3138static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
3139                                    struct igc_flex_filter *input)
3140{
3141        struct device *dev = &adapter->pdev->dev;
3142        struct igc_hw *hw = &adapter->hw;
3143        u8 *data = input->data;
3144        u8 *mask = input->mask;
3145        u32 queuing;
3146        u32 fhft;
3147        u32 wufc;
3148        int ret;
3149        int i;
3150
3151        /* Length has to be aligned to 8. Otherwise the filter will fail. Bail
3152         * out early to avoid surprises later.
3153         */
3154        if (input->length % 8 != 0) {
3155                dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n");
3156                return -EINVAL;
3157        }
3158
3159        /* Select corresponding flex filter register and get base for host table. */
3160        ret = igc_flex_filter_select(adapter, input, &fhft);
3161        if (ret)
3162                return ret;
3163
3164        /* When adding a filter globally disable flex filter feature. That is
3165         * recommended within the datasheet.
3166         */
3167        wufc = rd32(IGC_WUFC);
3168        wufc &= ~IGC_WUFC_FLEX_HQ;
3169        wr32(IGC_WUFC, wufc);
3170
3171        /* Configure filter */
3172        queuing = input->length & IGC_FHFT_LENGTH_MASK;
3173        queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK;
3174        queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK;
3175
3176        if (input->immediate_irq)
3177                queuing |= IGC_FHFT_IMM_INT;
3178
3179        if (input->drop)
3180                queuing |= IGC_FHFT_DROP;
3181
3182        wr32(fhft + 0xFC, queuing);
3183
3184        /* Write data (128 byte) and mask (128 bit) */
3185        for (i = 0; i < 16; ++i) {
3186                const size_t data_idx = i * 8;
3187                const size_t row_idx = i * 16;
3188                u32 dw0 =
3189                        (data[data_idx + 0] << 0) |
3190                        (data[data_idx + 1] << 8) |
3191                        (data[data_idx + 2] << 16) |
3192                        (data[data_idx + 3] << 24);
3193                u32 dw1 =
3194                        (data[data_idx + 4] << 0) |
3195                        (data[data_idx + 5] << 8) |
3196                        (data[data_idx + 6] << 16) |
3197                        (data[data_idx + 7] << 24);
3198                u32 tmp;
3199
3200                /* Write row: dw0, dw1 and mask */
3201                wr32(fhft + row_idx, dw0);
3202                wr32(fhft + row_idx + 4, dw1);
3203
3204                /* mask is only valid for MASK(7, 0) */
3205                tmp = rd32(fhft + row_idx + 8);
3206                tmp &= ~GENMASK(7, 0);
3207                tmp |= mask[i];
3208                wr32(fhft + row_idx + 8, tmp);
3209        }
3210
3211        /* Enable filter. */
3212        wufc |= IGC_WUFC_FLEX_HQ;
3213        if (input->index > 8) {
3214                /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */
3215                u32 wufc_ext = rd32(IGC_WUFC_EXT);
3216
3217                wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8));
3218
3219                wr32(IGC_WUFC_EXT, wufc_ext);
3220        } else {
3221                wufc |= (IGC_WUFC_FLX0 << input->index);
3222        }
3223        wr32(IGC_WUFC, wufc);
3224
3225        dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n",
3226                input->index);
3227
3228        return 0;
3229}
3230
3231static void igc_flex_filter_add_field(struct igc_flex_filter *flex,
3232                                      const void *src, unsigned int offset,
3233                                      size_t len, const void *mask)
3234{
3235        int i;
3236
3237        /* data */
3238        memcpy(&flex->data[offset], src, len);
3239
3240        /* mask */
3241        for (i = 0; i < len; ++i) {
3242                const unsigned int idx = i + offset;
3243                const u8 *ptr = mask;
3244
3245                if (mask) {
3246                        if (ptr[i] & 0xff)
3247                                flex->mask[idx / 8] |= BIT(idx % 8);
3248
3249                        continue;
3250                }
3251
3252                flex->mask[idx / 8] |= BIT(idx % 8);
3253        }
3254}
3255
3256static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter)
3257{
3258        struct igc_hw *hw = &adapter->hw;
3259        u32 wufc, wufc_ext;
3260        int i;
3261
3262        wufc = rd32(IGC_WUFC);
3263        wufc_ext = rd32(IGC_WUFC_EXT);
3264
3265        for (i = 0; i < MAX_FLEX_FILTER; i++) {
3266                if (i < 8) {
3267                        if (!(wufc & (IGC_WUFC_FLX0 << i)))
3268                                return i;
3269                } else {
3270                        if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8))))
3271                                return i;
3272                }
3273        }
3274
3275        return -ENOSPC;
3276}
3277
3278static bool igc_flex_filter_in_use(struct igc_adapter *adapter)
3279{
3280        struct igc_hw *hw = &adapter->hw;
3281        u32 wufc, wufc_ext;
3282
3283        wufc = rd32(IGC_WUFC);
3284        wufc_ext = rd32(IGC_WUFC_EXT);
3285
3286        if (wufc & IGC_WUFC_FILTER_MASK)
3287                return true;
3288
3289        if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK)
3290                return true;
3291
3292        return false;
3293}
3294
3295static int igc_add_flex_filter(struct igc_adapter *adapter,
3296                               struct igc_nfc_rule *rule)
3297{
3298        struct igc_flex_filter flex = { };
3299        struct igc_nfc_filter *filter = &rule->filter;
3300        unsigned int eth_offset, user_offset;
3301        int ret, index;
3302        bool vlan;
3303
3304        index = igc_find_avail_flex_filter_slot(adapter);
3305        if (index < 0)
3306                return -ENOSPC;
3307
3308        /* Construct the flex filter:
3309         *  -> dest_mac [6]
3310         *  -> src_mac [6]
3311         *  -> tpid [2]
3312         *  -> vlan tci [2]
3313         *  -> ether type [2]
3314         *  -> user data [8]
3315         *  -> = 26 bytes => 32 length
3316         */
3317        flex.index    = index;
3318        flex.length   = 32;
3319        flex.rx_queue = rule->action;
3320
3321        vlan = rule->filter.vlan_tci || rule->filter.vlan_etype;
3322        eth_offset = vlan ? 16 : 12;
3323        user_offset = vlan ? 18 : 14;
3324
3325        /* Add destination MAC  */
3326        if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
3327                igc_flex_filter_add_field(&flex, &filter->dst_addr, 0,
3328                                          ETH_ALEN, NULL);
3329
3330        /* Add source MAC */
3331        if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
3332                igc_flex_filter_add_field(&flex, &filter->src_addr, 6,
3333                                          ETH_ALEN, NULL);
3334
3335        /* Add VLAN etype */
3336        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE)
3337                igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12,
3338                                          sizeof(filter->vlan_etype),
3339                                          NULL);
3340
3341        /* Add VLAN TCI */
3342        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
3343                igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14,
3344                                          sizeof(filter->vlan_tci), NULL);
3345
3346        /* Add Ether type */
3347        if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
3348                __be16 etype = cpu_to_be16(filter->etype);
3349
3350                igc_flex_filter_add_field(&flex, &etype, eth_offset,
3351                                          sizeof(etype), NULL);
3352        }
3353
3354        /* Add user data */
3355        if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA)
3356                igc_flex_filter_add_field(&flex, &filter->user_data,
3357                                          user_offset,
3358                                          sizeof(filter->user_data),
3359                                          filter->user_mask);
3360
3361        /* Add it down to the hardware and enable it. */
3362        ret = igc_write_flex_filter_ll(adapter, &flex);
3363        if (ret)
3364                return ret;
3365
3366        filter->flex_index = index;
3367
3368        return 0;
3369}
3370
3371static void igc_del_flex_filter(struct igc_adapter *adapter,
3372                                u16 reg_index)
3373{
3374        struct igc_hw *hw = &adapter->hw;
3375        u32 wufc;
3376
3377        /* Just disable the filter. The filter table itself is kept
3378         * intact. Another flex_filter_add() should override the "old" data
3379         * then.
3380         */
3381        if (reg_index > 8) {
3382                u32 wufc_ext = rd32(IGC_WUFC_EXT);
3383
3384                wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8));
3385                wr32(IGC_WUFC_EXT, wufc_ext);
3386        } else {
3387                wufc = rd32(IGC_WUFC);
3388
3389                wufc &= ~(IGC_WUFC_FLX0 << reg_index);
3390                wr32(IGC_WUFC, wufc);
3391        }
3392
3393        if (igc_flex_filter_in_use(adapter))
3394                return;
3395
3396        /* No filters are in use, we may disable flex filters */
3397        wufc = rd32(IGC_WUFC);
3398        wufc &= ~IGC_WUFC_FLEX_HQ;
3399        wr32(IGC_WUFC, wufc);
3400}
3401
3402static int igc_enable_nfc_rule(struct igc_adapter *adapter,
3403                               struct igc_nfc_rule *rule)
3404{
3405        int err;
3406
3407        if (rule->flex) {
3408                return igc_add_flex_filter(adapter, rule);
3409        }
3410
3411        if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
3412                err = igc_add_etype_filter(adapter, rule->filter.etype,
3413                                           rule->action);
3414                if (err)
3415                        return err;
3416        }
3417
3418        if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
3419                err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3420                                         rule->filter.src_addr, rule->action);
3421                if (err)
3422                        return err;
3423        }
3424
3425        if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
3426                err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3427                                         rule->filter.dst_addr, rule->action);
3428                if (err)
3429                        return err;
3430        }
3431
3432        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3433                int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3434                           VLAN_PRIO_SHIFT;
3435
3436                err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
3437                if (err)
3438                        return err;
3439        }
3440
3441        return 0;
3442}
3443
3444static void igc_disable_nfc_rule(struct igc_adapter *adapter,
3445                                 const struct igc_nfc_rule *rule)
3446{
3447        if (rule->flex) {
3448                igc_del_flex_filter(adapter, rule->filter.flex_index);
3449                return;
3450        }
3451
3452        if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
3453                igc_del_etype_filter(adapter, rule->filter.etype);
3454
3455        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3456                int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3457                           VLAN_PRIO_SHIFT;
3458
3459                igc_del_vlan_prio_filter(adapter, prio);
3460        }
3461
3462        if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
3463                igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3464                                   rule->filter.src_addr);
3465
3466        if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
3467                igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3468                                   rule->filter.dst_addr);
3469}
3470
3471/**
3472 * igc_get_nfc_rule() - Get NFC rule
3473 * @adapter: Pointer to adapter
3474 * @location: Rule location
3475 *
3476 * Context: Expects adapter->nfc_rule_lock to be held by caller.
3477 *
3478 * Return: Pointer to NFC rule at @location. If not found, NULL.
3479 */
3480struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter,
3481                                      u32 location)
3482{
3483        struct igc_nfc_rule *rule;
3484
3485        list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
3486                if (rule->location == location)
3487                        return rule;
3488                if (rule->location > location)
3489                        break;
3490        }
3491
3492        return NULL;
3493}
3494
3495/**
3496 * igc_del_nfc_rule() - Delete NFC rule
3497 * @adapter: Pointer to adapter
3498 * @rule: Pointer to rule to be deleted
3499 *
3500 * Disable NFC rule in hardware and delete it from adapter.
3501 *
3502 * Context: Expects adapter->nfc_rule_lock to be held by caller.
3503 */
3504void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3505{
3506        igc_disable_nfc_rule(adapter, rule);
3507
3508        list_del(&rule->list);
3509        adapter->nfc_rule_count--;
3510
3511        kfree(rule);
3512}
3513
3514static void igc_flush_nfc_rules(struct igc_adapter *adapter)
3515{
3516        struct igc_nfc_rule *rule, *tmp;
3517
3518        mutex_lock(&adapter->nfc_rule_lock);
3519
3520        list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list)
3521                igc_del_nfc_rule(adapter, rule);
3522
3523        mutex_unlock(&adapter->nfc_rule_lock);
3524}
3525
3526/**
3527 * igc_add_nfc_rule() - Add NFC rule
3528 * @adapter: Pointer to adapter
3529 * @rule: Pointer to rule to be added
3530 *
3531 * Enable NFC rule in hardware and add it to adapter.
3532 *
3533 * Context: Expects adapter->nfc_rule_lock to be held by caller.
3534 *
3535 * Return: 0 on success, negative errno on failure.
3536 */
3537int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3538{
3539        struct igc_nfc_rule *pred, *cur;
3540        int err;
3541
3542        err = igc_enable_nfc_rule(adapter, rule);
3543        if (err)
3544                return err;
3545
3546        pred = NULL;
3547        list_for_each_entry(cur, &adapter->nfc_rule_list, list) {
3548                if (cur->location >= rule->location)
3549                        break;
3550                pred = cur;
3551        }
3552
3553        list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list);
3554        adapter->nfc_rule_count++;
3555        return 0;
3556}
3557
3558static void igc_restore_nfc_rules(struct igc_adapter *adapter)
3559{
3560        struct igc_nfc_rule *rule;
3561
3562        mutex_lock(&adapter->nfc_rule_lock);
3563
3564        list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
3565                igc_enable_nfc_rule(adapter, rule);
3566
3567        mutex_unlock(&adapter->nfc_rule_lock);
3568}
3569
3570static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
3571{
3572        struct igc_adapter *adapter = netdev_priv(netdev);
3573
3574        return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1);
3575}
3576
3577static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
3578{
3579        struct igc_adapter *adapter = netdev_priv(netdev);
3580
3581        igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr);
3582        return 0;
3583}
3584
3585/**
3586 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3587 * @netdev: network interface device structure
3588 *
3589 * The set_rx_mode entry point is called whenever the unicast or multicast
3590 * address lists or the network interface flags are updated.  This routine is
3591 * responsible for configuring the hardware for proper unicast, multicast,
3592 * promiscuous mode, and all-multi behavior.
3593 */
3594static void igc_set_rx_mode(struct net_device *netdev)
3595{
3596        struct igc_adapter *adapter = netdev_priv(netdev);
3597        struct igc_hw *hw = &adapter->hw;
3598        u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
3599        int count;
3600
3601        /* Check for Promiscuous and All Multicast modes */
3602        if (netdev->flags & IFF_PROMISC) {
3603                rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
3604        } else {
3605                if (netdev->flags & IFF_ALLMULTI) {
3606                        rctl |= IGC_RCTL_MPE;
3607                } else {
3608                        /* Write addresses to the MTA, if the attempt fails
3609                         * then we should just turn on promiscuous mode so
3610                         * that we can at least receive multicast traffic
3611                         */
3612                        count = igc_write_mc_addr_list(netdev);
3613                        if (count < 0)
3614                                rctl |= IGC_RCTL_MPE;
3615                }
3616        }
3617
3618        /* Write addresses to available RAR registers, if there is not
3619         * sufficient space to store all the addresses then enable
3620         * unicast promiscuous mode
3621         */
3622        if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
3623                rctl |= IGC_RCTL_UPE;
3624
3625        /* update state of unicast and multicast */
3626        rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
3627        wr32(IGC_RCTL, rctl);
3628
3629#if (PAGE_SIZE < 8192)
3630        if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
3631                rlpml = IGC_MAX_FRAME_BUILD_SKB;
3632#endif
3633        wr32(IGC_RLPML, rlpml);
3634}
3635
3636/**
3637 * igc_configure - configure the hardware for RX and TX
3638 * @adapter: private board structure
3639 */
3640static void igc_configure(struct igc_adapter *adapter)
3641{
3642        struct net_device *netdev = adapter->netdev;
3643        int i = 0;
3644
3645        igc_get_hw_control(adapter);
3646        igc_set_rx_mode(netdev);
3647
3648        igc_restore_vlan(adapter);
3649
3650        igc_setup_tctl(adapter);
3651        igc_setup_mrqc(adapter);
3652        igc_setup_rctl(adapter);
3653
3654        igc_set_default_mac_filter(adapter);
3655        igc_restore_nfc_rules(adapter);
3656
3657        igc_configure_tx(adapter);
3658        igc_configure_rx(adapter);
3659
3660        igc_rx_fifo_flush_base(&adapter->hw);
3661
3662        /* call igc_desc_unused which always leaves
3663         * at least 1 descriptor unused to make sure
3664         * next_to_use != next_to_clean
3665         */
3666        for (i = 0; i < adapter->num_rx_queues; i++) {
3667                struct igc_ring *ring = adapter->rx_ring[i];
3668
3669                if (ring->xsk_pool)
3670                        igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
3671                else
3672                        igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
3673        }
3674}
3675
3676/**
3677 * igc_write_ivar - configure ivar for given MSI-X vector
3678 * @hw: pointer to the HW structure
3679 * @msix_vector: vector number we are allocating to a given ring
3680 * @index: row index of IVAR register to write within IVAR table
3681 * @offset: column offset of in IVAR, should be multiple of 8
3682 *
3683 * The IVAR table consists of 2 columns,
3684 * each containing an cause allocation for an Rx and Tx ring, and a
3685 * variable number of rows depending on the number of queues supported.
3686 */
3687static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
3688                           int index, int offset)
3689{
3690        u32 ivar = array_rd32(IGC_IVAR0, index);
3691
3692        /* clear any bits that are currently set */
3693        ivar &= ~((u32)0xFF << offset);
3694
3695        /* write vector and valid bit */
3696        ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
3697
3698        array_wr32(IGC_IVAR0, index, ivar);
3699}
3700
3701static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
3702{
3703        struct igc_adapter *adapter = q_vector->adapter;
3704        struct igc_hw *hw = &adapter->hw;
3705        int rx_queue = IGC_N0_QUEUE;
3706        int tx_queue = IGC_N0_QUEUE;
3707
3708        if (q_vector->rx.ring)
3709                rx_queue = q_vector->rx.ring->reg_idx;
3710        if (q_vector->tx.ring)
3711                tx_queue = q_vector->tx.ring->reg_idx;
3712
3713        switch (hw->mac.type) {
3714        case igc_i225:
3715                if (rx_queue > IGC_N0_QUEUE)
3716                        igc_write_ivar(hw, msix_vector,
3717                                       rx_queue >> 1,
3718                                       (rx_queue & 0x1) << 4);
3719                if (tx_queue > IGC_N0_QUEUE)
3720                        igc_write_ivar(hw, msix_vector,
3721                                       tx_queue >> 1,
3722                                       ((tx_queue & 0x1) << 4) + 8);
3723                q_vector->eims_value = BIT(msix_vector);
3724                break;
3725        default:
3726                WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
3727                break;
3728        }
3729
3730        /* add q_vector eims value to global eims_enable_mask */
3731        adapter->eims_enable_mask |= q_vector->eims_value;
3732
3733        /* configure q_vector to set itr on first interrupt */
3734        q_vector->set_itr = 1;
3735}
3736
3737/**
3738 * igc_configure_msix - Configure MSI-X hardware
3739 * @adapter: Pointer to adapter structure
3740 *
3741 * igc_configure_msix sets up the hardware to properly
3742 * generate MSI-X interrupts.
3743 */
3744static void igc_configure_msix(struct igc_adapter *adapter)
3745{
3746        struct igc_hw *hw = &adapter->hw;
3747        int i, vector = 0;
3748        u32 tmp;
3749
3750        adapter->eims_enable_mask = 0;
3751
3752        /* set vector for other causes, i.e. link changes */
3753        switch (hw->mac.type) {
3754        case igc_i225:
3755                /* Turn on MSI-X capability first, or our settings
3756                 * won't stick.  And it will take days to debug.
3757                 */
3758                wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
3759                     IGC_GPIE_PBA | IGC_GPIE_EIAME |
3760                     IGC_GPIE_NSICR);
3761
3762                /* enable msix_other interrupt */
3763                adapter->eims_other = BIT(vector);
3764                tmp = (vector++ | IGC_IVAR_VALID) << 8;
3765
3766                wr32(IGC_IVAR_MISC, tmp);
3767                break;
3768        default:
3769                /* do nothing, since nothing else supports MSI-X */
3770                break;
3771        } /* switch (hw->mac.type) */
3772
3773        adapter->eims_enable_mask |= adapter->eims_other;
3774
3775        for (i = 0; i < adapter->num_q_vectors; i++)
3776                igc_assign_vector(adapter->q_vector[i], vector++);
3777
3778        wrfl();
3779}
3780
3781/**
3782 * igc_irq_enable - Enable default interrupt generation settings
3783 * @adapter: board private structure
3784 */
3785static void igc_irq_enable(struct igc_adapter *adapter)
3786{
3787        struct igc_hw *hw = &adapter->hw;
3788
3789        if (adapter->msix_entries) {
3790                u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
3791                u32 regval = rd32(IGC_EIAC);
3792
3793                wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
3794                regval = rd32(IGC_EIAM);
3795                wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
3796                wr32(IGC_EIMS, adapter->eims_enable_mask);
3797                wr32(IGC_IMS, ims);
3798        } else {
3799                wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3800                wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3801        }
3802}
3803
3804/**
3805 * igc_irq_disable - Mask off interrupt generation on the NIC
3806 * @adapter: board private structure
3807 */
3808static void igc_irq_disable(struct igc_adapter *adapter)
3809{
3810        struct igc_hw *hw = &adapter->hw;
3811
3812        if (adapter->msix_entries) {
3813                u32 regval = rd32(IGC_EIAM);
3814
3815                wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
3816                wr32(IGC_EIMC, adapter->eims_enable_mask);
3817                regval = rd32(IGC_EIAC);
3818                wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
3819        }
3820
3821        wr32(IGC_IAM, 0);
3822        wr32(IGC_IMC, ~0);
3823        wrfl();
3824
3825        if (adapter->msix_entries) {
3826                int vector = 0, i;
3827
3828                synchronize_irq(adapter->msix_entries[vector++].vector);
3829
3830                for (i = 0; i < adapter->num_q_vectors; i++)
3831                        synchronize_irq(adapter->msix_entries[vector++].vector);
3832        } else {
3833                synchronize_irq(adapter->pdev->irq);
3834        }
3835}
3836
3837void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
3838                              const u32 max_rss_queues)
3839{
3840        /* Determine if we need to pair queues. */
3841        /* If rss_queues > half of max_rss_queues, pair the queues in
3842         * order to conserve interrupts due to limited supply.
3843         */
3844        if (adapter->rss_queues > (max_rss_queues / 2))
3845                adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
3846        else
3847                adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
3848}
3849
3850unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
3851{
3852        return IGC_MAX_RX_QUEUES;
3853}
3854
3855static void igc_init_queue_configuration(struct igc_adapter *adapter)
3856{
3857        u32 max_rss_queues;
3858
3859        max_rss_queues = igc_get_max_rss_queues(adapter);
3860        adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
3861
3862        igc_set_flag_queue_pairs(adapter, max_rss_queues);
3863}
3864
3865/**
3866 * igc_reset_q_vector - Reset config for interrupt vector
3867 * @adapter: board private structure to initialize
3868 * @v_idx: Index of vector to be reset
3869 *
3870 * If NAPI is enabled it will delete any references to the
3871 * NAPI struct. This is preparation for igc_free_q_vector.
3872 */
3873static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
3874{
3875        struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
3876
3877        /* if we're coming from igc_set_interrupt_capability, the vectors are
3878         * not yet allocated
3879         */
3880        if (!q_vector)
3881                return;
3882
3883        if (q_vector->tx.ring)
3884                adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
3885
3886        if (q_vector->rx.ring)
3887                adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
3888
3889        netif_napi_del(&q_vector->napi);
3890}
3891
3892/**
3893 * igc_free_q_vector - Free memory allocated for specific interrupt vector
3894 * @adapter: board private structure to initialize
3895 * @v_idx: Index of vector to be freed
3896 *
3897 * This function frees the memory allocated to the q_vector.
3898 */
3899static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
3900{
3901        struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
3902
3903        adapter->q_vector[v_idx] = NULL;
3904
3905        /* igc_get_stats64() might access the rings on this vector,
3906         * we must wait a grace period before freeing it.
3907         */
3908        if (q_vector)
3909                kfree_rcu(q_vector, rcu);
3910}
3911
3912/**
3913 * igc_free_q_vectors - Free memory allocated for interrupt vectors
3914 * @adapter: board private structure to initialize
3915 *
3916 * This function frees the memory allocated to the q_vectors.  In addition if
3917 * NAPI is enabled it will delete any references to the NAPI struct prior
3918 * to freeing the q_vector.
3919 */
3920static void igc_free_q_vectors(struct igc_adapter *adapter)
3921{
3922        int v_idx = adapter->num_q_vectors;
3923
3924        adapter->num_tx_queues = 0;
3925        adapter->num_rx_queues = 0;
3926        adapter->num_q_vectors = 0;
3927
3928        while (v_idx--) {
3929                igc_reset_q_vector(adapter, v_idx);
3930                igc_free_q_vector(adapter, v_idx);
3931        }
3932}
3933
3934/**
3935 * igc_update_itr - update the dynamic ITR value based on statistics
3936 * @q_vector: pointer to q_vector
3937 * @ring_container: ring info to update the itr for
3938 *
3939 * Stores a new ITR value based on packets and byte
3940 * counts during the last interrupt.  The advantage of per interrupt
3941 * computation is faster updates and more accurate ITR for the current
3942 * traffic pattern.  Constants in this function were computed
3943 * based on theoretical maximum wire speed and thresholds were set based
3944 * on testing data as well as attempting to minimize response time
3945 * while increasing bulk throughput.
3946 * NOTE: These calculations are only valid when operating in a single-
3947 * queue environment.
3948 */
3949static void igc_update_itr(struct igc_q_vector *q_vector,
3950                           struct igc_ring_container *ring_container)
3951{
3952        unsigned int packets = ring_container->total_packets;
3953        unsigned int bytes = ring_container->total_bytes;
3954        u8 itrval = ring_container->itr;
3955
3956        /* no packets, exit with status unchanged */
3957        if (packets == 0)
3958                return;
3959
3960        switch (itrval) {
3961        case lowest_latency:
3962                /* handle TSO and jumbo frames */
3963                if (bytes / packets > 8000)
3964                        itrval = bulk_latency;
3965                else if ((packets < 5) && (bytes > 512))
3966                        itrval = low_latency;
3967                break;
3968        case low_latency:  /* 50 usec aka 20000 ints/s */
3969                if (bytes > 10000) {
3970                        /* this if handles the TSO accounting */
3971                        if (bytes / packets > 8000)
3972                                itrval = bulk_latency;
3973                        else if ((packets < 10) || ((bytes / packets) > 1200))
3974                                itrval = bulk_latency;
3975                        else if ((packets > 35))
3976                                itrval = lowest_latency;
3977                } else if (bytes / packets > 2000) {
3978                        itrval = bulk_latency;
3979                } else if (packets <= 2 && bytes < 512) {
3980                        itrval = lowest_latency;
3981                }
3982                break;
3983        case bulk_latency: /* 250 usec aka 4000 ints/s */
3984                if (bytes > 25000) {
3985                        if (packets > 35)
3986                                itrval = low_latency;
3987                } else if (bytes < 1500) {
3988                        itrval = low_latency;
3989                }
3990                break;
3991        }
3992
3993        /* clear work counters since we have the values we need */
3994        ring_container->total_bytes = 0;
3995        ring_container->total_packets = 0;
3996
3997        /* write updated itr to ring container */
3998        ring_container->itr = itrval;
3999}
4000
4001static void igc_set_itr(struct igc_q_vector *q_vector)
4002{
4003        struct igc_adapter *adapter = q_vector->adapter;
4004        u32 new_itr = q_vector->itr_val;
4005        u8 current_itr = 0;
4006
4007        /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4008        switch (adapter->link_speed) {
4009        case SPEED_10:
4010        case SPEED_100:
4011                current_itr = 0;
4012                new_itr = IGC_4K_ITR;
4013                goto set_itr_now;
4014        default:
4015                break;
4016        }
4017
4018        igc_update_itr(q_vector, &q_vector->tx);
4019        igc_update_itr(q_vector, &q_vector->rx);
4020
4021        current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4022
4023        /* conservative mode (itr 3) eliminates the lowest_latency setting */
4024        if (current_itr == lowest_latency &&
4025            ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4026            (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4027                current_itr = low_latency;
4028
4029        switch (current_itr) {
4030        /* counts and packets in update_itr are dependent on these numbers */
4031        case lowest_latency:
4032                new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
4033                break;
4034        case low_latency:
4035                new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
4036                break;
4037        case bulk_latency:
4038                new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
4039                break;
4040        default:
4041                break;
4042        }
4043
4044set_itr_now:
4045        if (new_itr != q_vector->itr_val) {
4046                /* this attempts to bias the interrupt rate towards Bulk
4047                 * by adding intermediate steps when interrupt rate is
4048                 * increasing
4049                 */
4050                new_itr = new_itr > q_vector->itr_val ?
4051                          max((new_itr * q_vector->itr_val) /
4052                          (new_itr + (q_vector->itr_val >> 2)),
4053                          new_itr) : new_itr;
4054                /* Don't write the value here; it resets the adapter's
4055                 * internal timer, and causes us to delay far longer than
4056                 * we should between interrupts.  Instead, we write the ITR
4057                 * value at the beginning of the next interrupt so the timing
4058                 * ends up being correct.
4059                 */
4060                q_vector->itr_val = new_itr;
4061                q_vector->set_itr = 1;
4062        }
4063}
4064
4065static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
4066{
4067        int v_idx = adapter->num_q_vectors;
4068
4069        if (adapter->msix_entries) {
4070                pci_disable_msix(adapter->pdev);
4071                kfree(adapter->msix_entries);
4072                adapter->msix_entries = NULL;
4073        } else if (adapter->flags & IGC_FLAG_HAS_MSI) {
4074                pci_disable_msi(adapter->pdev);
4075        }
4076
4077        while (v_idx--)
4078                igc_reset_q_vector(adapter, v_idx);
4079}
4080
4081/**
4082 * igc_set_interrupt_capability - set MSI or MSI-X if supported
4083 * @adapter: Pointer to adapter structure
4084 * @msix: boolean value for MSI-X capability
4085 *
4086 * Attempt to configure interrupts using the best available
4087 * capabilities of the hardware and kernel.
4088 */
4089static void igc_set_interrupt_capability(struct igc_adapter *adapter,
4090                                         bool msix)
4091{
4092        int numvecs, i;
4093        int err;
4094
4095        if (!msix)
4096                goto msi_only;
4097        adapter->flags |= IGC_FLAG_HAS_MSIX;
4098
4099        /* Number of supported queues. */
4100        adapter->num_rx_queues = adapter->rss_queues;
4101
4102        adapter->num_tx_queues = adapter->rss_queues;
4103
4104        /* start with one vector for every Rx queue */
4105        numvecs = adapter->num_rx_queues;
4106
4107        /* if Tx handler is separate add 1 for every Tx queue */
4108        if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
4109                numvecs += adapter->num_tx_queues;
4110
4111        /* store the number of vectors reserved for queues */
4112        adapter->num_q_vectors = numvecs;
4113
4114        /* add 1 vector for link status interrupts */
4115        numvecs++;
4116
4117        adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
4118                                        GFP_KERNEL);
4119
4120        if (!adapter->msix_entries)
4121                return;
4122
4123        /* populate entry values */
4124        for (i = 0; i < numvecs; i++)
4125                adapter->msix_entries[i].entry = i;
4126
4127        err = pci_enable_msix_range(adapter->pdev,
4128                                    adapter->msix_entries,
4129                                    numvecs,
4130                                    numvecs);
4131        if (err > 0)
4132                return;
4133
4134        kfree(adapter->msix_entries);
4135        adapter->msix_entries = NULL;
4136
4137        igc_reset_interrupt_capability(adapter);
4138
4139msi_only:
4140        adapter->flags &= ~IGC_FLAG_HAS_MSIX;
4141
4142        adapter->rss_queues = 1;
4143        adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
4144        adapter->num_rx_queues = 1;
4145        adapter->num_tx_queues = 1;
4146        adapter->num_q_vectors = 1;
4147        if (!pci_enable_msi(adapter->pdev))
4148                adapter->flags |= IGC_FLAG_HAS_MSI;
4149}
4150
4151/**
4152 * igc_update_ring_itr - update the dynamic ITR value based on packet size
4153 * @q_vector: pointer to q_vector
4154 *
4155 * Stores a new ITR value based on strictly on packet size.  This
4156 * algorithm is less sophisticated than that used in igc_update_itr,
4157 * due to the difficulty of synchronizing statistics across multiple
4158 * receive rings.  The divisors and thresholds used by this function
4159 * were determined based on theoretical maximum wire speed and testing
4160 * data, in order to minimize response time while increasing bulk
4161 * throughput.
4162 * NOTE: This function is called only when operating in a multiqueue
4163 * receive environment.
4164 */
4165static void igc_update_ring_itr(struct igc_q_vector *q_vector)
4166{
4167        struct igc_adapter *adapter = q_vector->adapter;
4168        int new_val = q_vector->itr_val;
4169        int avg_wire_size = 0;
4170        unsigned int packets;
4171
4172        /* For non-gigabit speeds, just fix the interrupt rate at 4000
4173         * ints/sec - ITR timer value of 120 ticks.
4174         */
4175        switch (adapter->link_speed) {
4176        case SPEED_10:
4177        case SPEED_100:
4178                new_val = IGC_4K_ITR;
4179                goto set_itr_val;
4180        default:
4181                break;
4182        }
4183
4184        packets = q_vector->rx.total_packets;
4185        if (packets)
4186                avg_wire_size = q_vector->rx.total_bytes / packets;
4187
4188        packets = q_vector->tx.total_packets;
4189        if (packets)
4190                avg_wire_size = max_t(u32, avg_wire_size,
4191                                      q_vector->tx.total_bytes / packets);
4192
4193        /* if avg_wire_size isn't set no work was done */
4194        if (!avg_wire_size)
4195                goto clear_counts;
4196
4197        /* Add 24 bytes to size to account for CRC, preamble, and gap */
4198        avg_wire_size += 24;
4199
4200        /* Don't starve jumbo frames */
4201        avg_wire_size = min(avg_wire_size, 3000);
4202
4203        /* Give a little boost to mid-size frames */
4204        if (avg_wire_size > 300 && avg_wire_size < 1200)
4205                new_val = avg_wire_size / 3;
4206        else
4207                new_val = avg_wire_size / 2;
4208
4209        /* conservative mode (itr 3) eliminates the lowest_latency setting */
4210        if (new_val < IGC_20K_ITR &&
4211            ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4212            (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4213                new_val = IGC_20K_ITR;
4214
4215set_itr_val:
4216        if (new_val != q_vector->itr_val) {
4217                q_vector->itr_val = new_val;
4218                q_vector->set_itr = 1;
4219        }
4220clear_counts:
4221        q_vector->rx.total_bytes = 0;
4222        q_vector->rx.total_packets = 0;
4223        q_vector->tx.total_bytes = 0;
4224        q_vector->tx.total_packets = 0;
4225}
4226
4227static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
4228{
4229        struct igc_adapter *adapter = q_vector->adapter;
4230        struct igc_hw *hw = &adapter->hw;
4231
4232        if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
4233            (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
4234                if (adapter->num_q_vectors == 1)
4235                        igc_set_itr(q_vector);
4236                else
4237                        igc_update_ring_itr(q_vector);
4238        }
4239
4240        if (!test_bit(__IGC_DOWN, &adapter->state)) {
4241                if (adapter->msix_entries)
4242                        wr32(IGC_EIMS, q_vector->eims_value);
4243                else
4244                        igc_irq_enable(adapter);
4245        }
4246}
4247
4248static void igc_add_ring(struct igc_ring *ring,
4249                         struct igc_ring_container *head)
4250{
4251        head->ring = ring;
4252        head->count++;
4253}
4254
4255/**
4256 * igc_cache_ring_register - Descriptor ring to register mapping
4257 * @adapter: board private structure to initialize
4258 *
4259 * Once we know the feature-set enabled for the device, we'll cache
4260 * the register offset the descriptor ring is assigned to.
4261 */
4262static void igc_cache_ring_register(struct igc_adapter *adapter)
4263{
4264        int i = 0, j = 0;
4265
4266        switch (adapter->hw.mac.type) {
4267        case igc_i225:
4268        default:
4269                for (; i < adapter->num_rx_queues; i++)
4270                        adapter->rx_ring[i]->reg_idx = i;
4271                for (; j < adapter->num_tx_queues; j++)
4272                        adapter->tx_ring[j]->reg_idx = j;
4273                break;
4274        }
4275}
4276
4277/**
4278 * igc_poll - NAPI Rx polling callback
4279 * @napi: napi polling structure
4280 * @budget: count of how many packets we should handle
4281 */
4282static int igc_poll(struct napi_struct *napi, int budget)
4283{
4284        struct igc_q_vector *q_vector = container_of(napi,
4285                                                     struct igc_q_vector,
4286                                                     napi);
4287        struct igc_ring *rx_ring = q_vector->rx.ring;
4288        bool clean_complete = true;
4289        int work_done = 0;
4290
4291        if (q_vector->tx.ring)
4292                clean_complete = igc_clean_tx_irq(q_vector, budget);
4293
4294        if (rx_ring) {
4295                int cleaned = rx_ring->xsk_pool ?
4296                              igc_clean_rx_irq_zc(q_vector, budget) :
4297                              igc_clean_rx_irq(q_vector, budget);
4298
4299                work_done += cleaned;
4300                if (cleaned >= budget)
4301                        clean_complete = false;
4302        }
4303
4304        /* If all work not completed, return budget and keep polling */
4305        if (!clean_complete)
4306                return budget;
4307
4308        /* Exit the polling mode, but don't re-enable interrupts if stack might
4309         * poll us due to busy-polling
4310         */
4311        if (likely(napi_complete_done(napi, work_done)))
4312                igc_ring_irq_enable(q_vector);
4313
4314        return min(work_done, budget - 1);
4315}
4316
4317/**
4318 * igc_alloc_q_vector - Allocate memory for a single interrupt vector
4319 * @adapter: board private structure to initialize
4320 * @v_count: q_vectors allocated on adapter, used for ring interleaving
4321 * @v_idx: index of vector in adapter struct
4322 * @txr_count: total number of Tx rings to allocate
4323 * @txr_idx: index of first Tx ring to allocate
4324 * @rxr_count: total number of Rx rings to allocate
4325 * @rxr_idx: index of first Rx ring to allocate
4326 *
4327 * We allocate one q_vector.  If allocation fails we return -ENOMEM.
4328 */
4329static int igc_alloc_q_vector(struct igc_adapter *adapter,
4330                              unsigned int v_count, unsigned int v_idx,
4331                              unsigned int txr_count, unsigned int txr_idx,
4332                              unsigned int rxr_count, unsigned int rxr_idx)
4333{
4334        struct igc_q_vector *q_vector;
4335        struct igc_ring *ring;
4336        int ring_count;
4337
4338        /* igc only supports 1 Tx and/or 1 Rx queue per vector */
4339        if (txr_count > 1 || rxr_count > 1)
4340                return -ENOMEM;
4341
4342        ring_count = txr_count + rxr_count;
4343
4344        /* allocate q_vector and rings */
4345        q_vector = adapter->q_vector[v_idx];
4346        if (!q_vector)
4347                q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
4348                                   GFP_KERNEL);
4349        else
4350                memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
4351        if (!q_vector)
4352                return -ENOMEM;
4353
4354        /* initialize NAPI */
4355        netif_napi_add(adapter->netdev, &q_vector->napi,
4356                       igc_poll, 64);
4357
4358        /* tie q_vector and adapter together */
4359        adapter->q_vector[v_idx] = q_vector;
4360        q_vector->adapter = adapter;
4361
4362        /* initialize work limits */
4363        q_vector->tx.work_limit = adapter->tx_work_limit;
4364
4365        /* initialize ITR configuration */
4366        q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
4367        q_vector->itr_val = IGC_START_ITR;
4368
4369        /* initialize pointer to rings */
4370        ring = q_vector->ring;
4371
4372        /* initialize ITR */
4373        if (rxr_count) {
4374                /* rx or rx/tx vector */
4375                if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
4376                        q_vector->itr_val = adapter->rx_itr_setting;
4377        } else {
4378                /* tx only vector */
4379                if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
4380                        q_vector->itr_val = adapter->tx_itr_setting;
4381        }
4382
4383        if (txr_count) {
4384                /* assign generic ring traits */
4385                ring->dev = &adapter->pdev->dev;
4386                ring->netdev = adapter->netdev;
4387
4388                /* configure backlink on ring */
4389                ring->q_vector = q_vector;
4390
4391                /* update q_vector Tx values */
4392                igc_add_ring(ring, &q_vector->tx);
4393
4394                /* apply Tx specific ring traits */
4395                ring->count = adapter->tx_ring_count;
4396                ring->queue_index = txr_idx;
4397
4398                /* assign ring to adapter */
4399                adapter->tx_ring[txr_idx] = ring;
4400
4401                /* push pointer to next ring */
4402                ring++;
4403        }
4404
4405        if (rxr_count) {
4406                /* assign generic ring traits */
4407                ring->dev = &adapter->pdev->dev;
4408                ring->netdev = adapter->netdev;
4409
4410                /* configure backlink on ring */
4411                ring->q_vector = q_vector;
4412
4413                /* update q_vector Rx values */
4414                igc_add_ring(ring, &q_vector->rx);
4415
4416                /* apply Rx specific ring traits */
4417                ring->count = adapter->rx_ring_count;
4418                ring->queue_index = rxr_idx;
4419
4420                /* assign ring to adapter */
4421                adapter->rx_ring[rxr_idx] = ring;
4422        }
4423
4424        return 0;
4425}
4426
4427/**
4428 * igc_alloc_q_vectors - Allocate memory for interrupt vectors
4429 * @adapter: board private structure to initialize
4430 *
4431 * We allocate one q_vector per queue interrupt.  If allocation fails we
4432 * return -ENOMEM.
4433 */
4434static int igc_alloc_q_vectors(struct igc_adapter *adapter)
4435{
4436        int rxr_remaining = adapter->num_rx_queues;
4437        int txr_remaining = adapter->num_tx_queues;
4438        int rxr_idx = 0, txr_idx = 0, v_idx = 0;
4439        int q_vectors = adapter->num_q_vectors;
4440        int err;
4441
4442        if (q_vectors >= (rxr_remaining + txr_remaining)) {
4443                for (; rxr_remaining; v_idx++) {
4444                        err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4445                                                 0, 0, 1, rxr_idx);
4446
4447                        if (err)
4448                                goto err_out;
4449
4450                        /* update counts and index */
4451                        rxr_remaining--;
4452                        rxr_idx++;
4453                }
4454        }
4455
4456        for (; v_idx < q_vectors; v_idx++) {
4457                int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
4458                int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
4459
4460                err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4461                                         tqpv, txr_idx, rqpv, rxr_idx);
4462
4463                if (err)
4464                        goto err_out;
4465
4466                /* update counts and index */
4467                rxr_remaining -= rqpv;
4468                txr_remaining -= tqpv;
4469                rxr_idx++;
4470                txr_idx++;
4471        }
4472
4473        return 0;
4474
4475err_out:
4476        adapter->num_tx_queues = 0;
4477        adapter->num_rx_queues = 0;
4478        adapter->num_q_vectors = 0;
4479
4480        while (v_idx--)
4481                igc_free_q_vector(adapter, v_idx);
4482
4483        return -ENOMEM;
4484}
4485
4486/**
4487 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
4488 * @adapter: Pointer to adapter structure
4489 * @msix: boolean for MSI-X capability
4490 *
4491 * This function initializes the interrupts and allocates all of the queues.
4492 */
4493static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
4494{
4495        struct net_device *dev = adapter->netdev;
4496        int err = 0;
4497
4498        igc_set_interrupt_capability(adapter, msix);
4499
4500        err = igc_alloc_q_vectors(adapter);
4501        if (err) {
4502                netdev_err(dev, "Unable to allocate memory for vectors\n");
4503                goto err_alloc_q_vectors;
4504        }
4505
4506        igc_cache_ring_register(adapter);
4507
4508        return 0;
4509
4510err_alloc_q_vectors:
4511        igc_reset_interrupt_capability(adapter);
4512        return err;
4513}
4514
4515/**
4516 * igc_sw_init - Initialize general software structures (struct igc_adapter)
4517 * @adapter: board private structure to initialize
4518 *
4519 * igc_sw_init initializes the Adapter private data structure.
4520 * Fields are initialized based on PCI device information and
4521 * OS network device settings (MTU size).
4522 */
4523static int igc_sw_init(struct igc_adapter *adapter)
4524{
4525        struct net_device *netdev = adapter->netdev;
4526        struct pci_dev *pdev = adapter->pdev;
4527        struct igc_hw *hw = &adapter->hw;
4528
4529        pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
4530
4531        /* set default ring sizes */
4532        adapter->tx_ring_count = IGC_DEFAULT_TXD;
4533        adapter->rx_ring_count = IGC_DEFAULT_RXD;
4534
4535        /* set default ITR values */
4536        adapter->rx_itr_setting = IGC_DEFAULT_ITR;
4537        adapter->tx_itr_setting = IGC_DEFAULT_ITR;
4538
4539        /* set default work limits */
4540        adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
4541
4542        /* adjust max frame to be at least the size of a standard frame */
4543        adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
4544                                VLAN_HLEN;
4545        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
4546
4547        mutex_init(&adapter->nfc_rule_lock);
4548        INIT_LIST_HEAD(&adapter->nfc_rule_list);
4549        adapter->nfc_rule_count = 0;
4550
4551        spin_lock_init(&adapter->stats64_lock);
4552        /* Assume MSI-X interrupts, will be checked during IRQ allocation */
4553        adapter->flags |= IGC_FLAG_HAS_MSIX;
4554
4555        igc_init_queue_configuration(adapter);
4556
4557        /* This call may decrease the number of queues */
4558        if (igc_init_interrupt_scheme(adapter, true)) {
4559                netdev_err(netdev, "Unable to allocate memory for queues\n");
4560                return -ENOMEM;
4561        }
4562
4563        /* Explicitly disable IRQ since the NIC can be in any state. */
4564        igc_irq_disable(adapter);
4565
4566        set_bit(__IGC_DOWN, &adapter->state);
4567
4568        return 0;
4569}
4570
4571/**
4572 * igc_up - Open the interface and prepare it to handle traffic
4573 * @adapter: board private structure
4574 */
4575void igc_up(struct igc_adapter *adapter)
4576{
4577        struct igc_hw *hw = &adapter->hw;
4578        int i = 0;
4579
4580        /* hardware has been reset, we need to reload some things */
4581        igc_configure(adapter);
4582
4583        clear_bit(__IGC_DOWN, &adapter->state);
4584
4585        for (i = 0; i < adapter->num_q_vectors; i++)
4586                napi_enable(&adapter->q_vector[i]->napi);
4587
4588        if (adapter->msix_entries)
4589                igc_configure_msix(adapter);
4590        else
4591                igc_assign_vector(adapter->q_vector[0], 0);
4592
4593        /* Clear any pending interrupts. */
4594        rd32(IGC_ICR);
4595        igc_irq_enable(adapter);
4596
4597        netif_tx_start_all_queues(adapter->netdev);
4598
4599        /* start the watchdog. */
4600        hw->mac.get_link_status = true;
4601        schedule_work(&adapter->watchdog_task);
4602}
4603
4604/**
4605 * igc_update_stats - Update the board statistics counters
4606 * @adapter: board private structure
4607 */
4608void igc_update_stats(struct igc_adapter *adapter)
4609{
4610        struct rtnl_link_stats64 *net_stats = &adapter->stats64;
4611        struct pci_dev *pdev = adapter->pdev;
4612        struct igc_hw *hw = &adapter->hw;
4613        u64 _bytes, _packets;
4614        u64 bytes, packets;
4615        unsigned int start;
4616        u32 mpc;
4617        int i;
4618
4619        /* Prevent stats update while adapter is being reset, or if the pci
4620         * connection is down.
4621         */
4622        if (adapter->link_speed == 0)
4623                return;
4624        if (pci_channel_offline(pdev))
4625                return;
4626
4627        packets = 0;
4628        bytes = 0;
4629
4630        rcu_read_lock();
4631        for (i = 0; i < adapter->num_rx_queues; i++) {
4632                struct igc_ring *ring = adapter->rx_ring[i];
4633                u32 rqdpc = rd32(IGC_RQDPC(i));
4634
4635                if (hw->mac.type >= igc_i225)
4636                        wr32(IGC_RQDPC(i), 0);
4637
4638                if (rqdpc) {
4639                        ring->rx_stats.drops += rqdpc;
4640                        net_stats->rx_fifo_errors += rqdpc;
4641                }
4642
4643                do {
4644                        start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
4645                        _bytes = ring->rx_stats.bytes;
4646                        _packets = ring->rx_stats.packets;
4647                } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
4648                bytes += _bytes;
4649                packets += _packets;
4650        }
4651
4652        net_stats->rx_bytes = bytes;
4653        net_stats->rx_packets = packets;
4654
4655        packets = 0;
4656        bytes = 0;
4657        for (i = 0; i < adapter->num_tx_queues; i++) {
4658                struct igc_ring *ring = adapter->tx_ring[i];
4659
4660                do {
4661                        start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
4662                        _bytes = ring->tx_stats.bytes;
4663                        _packets = ring->tx_stats.packets;
4664                } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
4665                bytes += _bytes;
4666                packets += _packets;
4667        }
4668        net_stats->tx_bytes = bytes;
4669        net_stats->tx_packets = packets;
4670        rcu_read_unlock();
4671
4672        /* read stats registers */
4673        adapter->stats.crcerrs += rd32(IGC_CRCERRS);
4674        adapter->stats.gprc += rd32(IGC_GPRC);
4675        adapter->stats.gorc += rd32(IGC_GORCL);
4676        rd32(IGC_GORCH); /* clear GORCL */
4677        adapter->stats.bprc += rd32(IGC_BPRC);
4678        adapter->stats.mprc += rd32(IGC_MPRC);
4679        adapter->stats.roc += rd32(IGC_ROC);
4680
4681        adapter->stats.prc64 += rd32(IGC_PRC64);
4682        adapter->stats.prc127 += rd32(IGC_PRC127);
4683        adapter->stats.prc255 += rd32(IGC_PRC255);
4684        adapter->stats.prc511 += rd32(IGC_PRC511);
4685        adapter->stats.prc1023 += rd32(IGC_PRC1023);
4686        adapter->stats.prc1522 += rd32(IGC_PRC1522);
4687        adapter->stats.tlpic += rd32(IGC_TLPIC);
4688        adapter->stats.rlpic += rd32(IGC_RLPIC);
4689        adapter->stats.hgptc += rd32(IGC_HGPTC);
4690
4691        mpc = rd32(IGC_MPC);
4692        adapter->stats.mpc += mpc;
4693        net_stats->rx_fifo_errors += mpc;
4694        adapter->stats.scc += rd32(IGC_SCC);
4695        adapter->stats.ecol += rd32(IGC_ECOL);
4696        adapter->stats.mcc += rd32(IGC_MCC);
4697        adapter->stats.latecol += rd32(IGC_LATECOL);
4698        adapter->stats.dc += rd32(IGC_DC);
4699        adapter->stats.rlec += rd32(IGC_RLEC);
4700        adapter->stats.xonrxc += rd32(IGC_XONRXC);
4701        adapter->stats.xontxc += rd32(IGC_XONTXC);
4702        adapter->stats.xoffrxc += rd32(IGC_XOFFRXC);
4703        adapter->stats.xofftxc += rd32(IGC_XOFFTXC);
4704        adapter->stats.fcruc += rd32(IGC_FCRUC);
4705        adapter->stats.gptc += rd32(IGC_GPTC);
4706        adapter->stats.gotc += rd32(IGC_GOTCL);
4707        rd32(IGC_GOTCH); /* clear GOTCL */
4708        adapter->stats.rnbc += rd32(IGC_RNBC);
4709        adapter->stats.ruc += rd32(IGC_RUC);
4710        adapter->stats.rfc += rd32(IGC_RFC);
4711        adapter->stats.rjc += rd32(IGC_RJC);
4712        adapter->stats.tor += rd32(IGC_TORH);
4713        adapter->stats.tot += rd32(IGC_TOTH);
4714        adapter->stats.tpr += rd32(IGC_TPR);
4715
4716        adapter->stats.ptc64 += rd32(IGC_PTC64);
4717        adapter->stats.ptc127 += rd32(IGC_PTC127);
4718        adapter->stats.ptc255 += rd32(IGC_PTC255);
4719        adapter->stats.ptc511 += rd32(IGC_PTC511);
4720        adapter->stats.ptc1023 += rd32(IGC_PTC1023);
4721        adapter->stats.ptc1522 += rd32(IGC_PTC1522);
4722
4723        adapter->stats.mptc += rd32(IGC_MPTC);
4724        adapter->stats.bptc += rd32(IGC_BPTC);
4725
4726        adapter->stats.tpt += rd32(IGC_TPT);
4727        adapter->stats.colc += rd32(IGC_COLC);
4728        adapter->stats.colc += rd32(IGC_RERC);
4729
4730        adapter->stats.algnerrc += rd32(IGC_ALGNERRC);
4731
4732        adapter->stats.tsctc += rd32(IGC_TSCTC);
4733
4734        adapter->stats.iac += rd32(IGC_IAC);
4735
4736        /* Fill out the OS statistics structure */
4737        net_stats->multicast = adapter->stats.mprc;
4738        net_stats->collisions = adapter->stats.colc;
4739
4740        /* Rx Errors */
4741
4742        /* RLEC on some newer hardware can be incorrect so build
4743         * our own version based on RUC and ROC
4744         */
4745        net_stats->rx_errors = adapter->stats.rxerrc +
4746                adapter->stats.crcerrs + adapter->stats.algnerrc +
4747                adapter->stats.ruc + adapter->stats.roc +
4748                adapter->stats.cexterr;
4749        net_stats->rx_length_errors = adapter->stats.ruc +
4750                                      adapter->stats.roc;
4751        net_stats->rx_crc_errors = adapter->stats.crcerrs;
4752        net_stats->rx_frame_errors = adapter->stats.algnerrc;
4753        net_stats->rx_missed_errors = adapter->stats.mpc;
4754
4755        /* Tx Errors */
4756        net_stats->tx_errors = adapter->stats.ecol +
4757                               adapter->stats.latecol;
4758        net_stats->tx_aborted_errors = adapter->stats.ecol;
4759        net_stats->tx_window_errors = adapter->stats.latecol;
4760        net_stats->tx_carrier_errors = adapter->stats.tncrs;
4761
4762        /* Tx Dropped needs to be maintained elsewhere */
4763
4764        /* Management Stats */
4765        adapter->stats.mgptc += rd32(IGC_MGTPTC);
4766        adapter->stats.mgprc += rd32(IGC_MGTPRC);
4767        adapter->stats.mgpdc += rd32(IGC_MGTPDC);
4768}
4769
4770/**
4771 * igc_down - Close the interface
4772 * @adapter: board private structure
4773 */
4774void igc_down(struct igc_adapter *adapter)
4775{
4776        struct net_device *netdev = adapter->netdev;
4777        struct igc_hw *hw = &adapter->hw;
4778        u32 tctl, rctl;
4779        int i = 0;
4780
4781        set_bit(__IGC_DOWN, &adapter->state);
4782
4783        igc_ptp_suspend(adapter);
4784
4785        if (pci_device_is_present(adapter->pdev)) {
4786                /* disable receives in the hardware */
4787                rctl = rd32(IGC_RCTL);
4788                wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
4789                /* flush and sleep below */
4790        }
4791        /* set trans_start so we don't get spurious watchdogs during reset */
4792        netif_trans_update(netdev);
4793
4794        netif_carrier_off(netdev);
4795        netif_tx_stop_all_queues(netdev);
4796
4797        if (pci_device_is_present(adapter->pdev)) {
4798                /* disable transmits in the hardware */
4799                tctl = rd32(IGC_TCTL);
4800                tctl &= ~IGC_TCTL_EN;
4801                wr32(IGC_TCTL, tctl);
4802                /* flush both disables and wait for them to finish */
4803                wrfl();
4804                usleep_range(10000, 20000);
4805
4806                igc_irq_disable(adapter);
4807        }
4808
4809        adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
4810
4811        for (i = 0; i < adapter->num_q_vectors; i++) {
4812                if (adapter->q_vector[i]) {
4813                        napi_synchronize(&adapter->q_vector[i]->napi);
4814                        napi_disable(&adapter->q_vector[i]->napi);
4815                }
4816        }
4817
4818        del_timer_sync(&adapter->watchdog_timer);
4819        del_timer_sync(&adapter->phy_info_timer);
4820
4821        /* record the stats before reset*/
4822        spin_lock(&adapter->stats64_lock);
4823        igc_update_stats(adapter);
4824        spin_unlock(&adapter->stats64_lock);
4825
4826        adapter->link_speed = 0;
4827        adapter->link_duplex = 0;
4828
4829        if (!pci_channel_offline(adapter->pdev))
4830                igc_reset(adapter);
4831
4832        /* clear VLAN promisc flag so VFTA will be updated if necessary */
4833        adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
4834
4835        igc_clean_all_tx_rings(adapter);
4836        igc_clean_all_rx_rings(adapter);
4837}
4838
4839void igc_reinit_locked(struct igc_adapter *adapter)
4840{
4841        while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
4842                usleep_range(1000, 2000);
4843        igc_down(adapter);
4844        igc_up(adapter);
4845        clear_bit(__IGC_RESETTING, &adapter->state);
4846}
4847
4848static void igc_reset_task(struct work_struct *work)
4849{
4850        struct igc_adapter *adapter;
4851
4852        adapter = container_of(work, struct igc_adapter, reset_task);
4853
4854        rtnl_lock();
4855        /* If we're already down or resetting, just bail */
4856        if (test_bit(__IGC_DOWN, &adapter->state) ||
4857            test_bit(__IGC_RESETTING, &adapter->state)) {
4858                rtnl_unlock();
4859                return;
4860        }
4861
4862        igc_rings_dump(adapter);
4863        igc_regs_dump(adapter);
4864        netdev_err(adapter->netdev, "Reset adapter\n");
4865        igc_reinit_locked(adapter);
4866        rtnl_unlock();
4867}
4868
4869/**
4870 * igc_change_mtu - Change the Maximum Transfer Unit
4871 * @netdev: network interface device structure
4872 * @new_mtu: new value for maximum frame size
4873 *
4874 * Returns 0 on success, negative on failure
4875 */
4876static int igc_change_mtu(struct net_device *netdev, int new_mtu)
4877{
4878        int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4879        struct igc_adapter *adapter = netdev_priv(netdev);
4880
4881        if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) {
4882                netdev_dbg(netdev, "Jumbo frames not supported with XDP");
4883                return -EINVAL;
4884        }
4885
4886        /* adjust max frame to be at least the size of a standard frame */
4887        if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
4888                max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
4889
4890        while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
4891                usleep_range(1000, 2000);
4892
4893        /* igc_down has a dependency on max_frame_size */
4894        adapter->max_frame_size = max_frame;
4895
4896        if (netif_running(netdev))
4897                igc_down(adapter);
4898
4899        netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
4900        netdev->mtu = new_mtu;
4901
4902        if (netif_running(netdev))
4903                igc_up(adapter);
4904        else
4905                igc_reset(adapter);
4906
4907        clear_bit(__IGC_RESETTING, &adapter->state);
4908
4909        return 0;
4910}
4911
4912/**
4913 * igc_get_stats64 - Get System Network Statistics
4914 * @netdev: network interface device structure
4915 * @stats: rtnl_link_stats64 pointer
4916 *
4917 * Returns the address of the device statistics structure.
4918 * The statistics are updated here and also from the timer callback.
4919 */
4920static void igc_get_stats64(struct net_device *netdev,
4921                            struct rtnl_link_stats64 *stats)
4922{
4923        struct igc_adapter *adapter = netdev_priv(netdev);
4924
4925        spin_lock(&adapter->stats64_lock);
4926        if (!test_bit(__IGC_RESETTING, &adapter->state))
4927                igc_update_stats(adapter);
4928        memcpy(stats, &adapter->stats64, sizeof(*stats));
4929        spin_unlock(&adapter->stats64_lock);
4930}
4931
4932static netdev_features_t igc_fix_features(struct net_device *netdev,
4933                                          netdev_features_t features)
4934{
4935        /* Since there is no support for separate Rx/Tx vlan accel
4936         * enable/disable make sure Tx flag is always in same state as Rx.
4937         */
4938        if (features & NETIF_F_HW_VLAN_CTAG_RX)
4939                features |= NETIF_F_HW_VLAN_CTAG_TX;
4940        else
4941                features &= ~NETIF_F_HW_VLAN_CTAG_TX;
4942
4943        return features;
4944}
4945
4946static int igc_set_features(struct net_device *netdev,
4947                            netdev_features_t features)
4948{
4949        netdev_features_t changed = netdev->features ^ features;
4950        struct igc_adapter *adapter = netdev_priv(netdev);
4951
4952        if (changed & NETIF_F_HW_VLAN_CTAG_RX)
4953                igc_vlan_mode(netdev, features);
4954
4955        /* Add VLAN support */
4956        if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
4957                return 0;
4958
4959        if (!(features & NETIF_F_NTUPLE))
4960                igc_flush_nfc_rules(adapter);
4961
4962        netdev->features = features;
4963
4964        if (netif_running(netdev))
4965                igc_reinit_locked(adapter);
4966        else
4967                igc_reset(adapter);
4968
4969        return 1;
4970}
4971
4972static netdev_features_t
4973igc_features_check(struct sk_buff *skb, struct net_device *dev,
4974                   netdev_features_t features)
4975{
4976        unsigned int network_hdr_len, mac_hdr_len;
4977
4978        /* Make certain the headers can be described by a context descriptor */
4979        mac_hdr_len = skb_network_header(skb) - skb->data;
4980        if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN))
4981                return features & ~(NETIF_F_HW_CSUM |
4982                                    NETIF_F_SCTP_CRC |
4983                                    NETIF_F_HW_VLAN_CTAG_TX |
4984                                    NETIF_F_TSO |
4985                                    NETIF_F_TSO6);
4986
4987        network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
4988        if (unlikely(network_hdr_len >  IGC_MAX_NETWORK_HDR_LEN))
4989                return features & ~(NETIF_F_HW_CSUM |
4990                                    NETIF_F_SCTP_CRC |
4991                                    NETIF_F_TSO |
4992                                    NETIF_F_TSO6);
4993
4994        /* We can only support IPv4 TSO in tunnels if we can mangle the
4995         * inner IP ID field, so strip TSO if MANGLEID is not supported.
4996         */
4997        if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
4998                features &= ~NETIF_F_TSO;
4999
5000        return features;
5001}
5002
5003static void igc_tsync_interrupt(struct igc_adapter *adapter)
5004{
5005        u32 ack, tsauxc, sec, nsec, tsicr;
5006        struct igc_hw *hw = &adapter->hw;
5007        struct ptp_clock_event event;
5008        struct timespec64 ts;
5009
5010        tsicr = rd32(IGC_TSICR);
5011        ack = 0;
5012
5013        if (tsicr & IGC_TSICR_SYS_WRAP) {
5014                event.type = PTP_CLOCK_PPS;
5015                if (adapter->ptp_caps.pps)
5016                        ptp_clock_event(adapter->ptp_clock, &event);
5017                ack |= IGC_TSICR_SYS_WRAP;
5018        }
5019
5020        if (tsicr & IGC_TSICR_TXTS) {
5021                /* retrieve hardware timestamp */
5022                schedule_work(&adapter->ptp_tx_work);
5023                ack |= IGC_TSICR_TXTS;
5024        }
5025
5026        if (tsicr & IGC_TSICR_TT0) {
5027                spin_lock(&adapter->tmreg_lock);
5028                ts = timespec64_add(adapter->perout[0].start,
5029                                    adapter->perout[0].period);
5030                wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
5031                wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec);
5032                tsauxc = rd32(IGC_TSAUXC);
5033                tsauxc |= IGC_TSAUXC_EN_TT0;
5034                wr32(IGC_TSAUXC, tsauxc);
5035                adapter->perout[0].start = ts;
5036                spin_unlock(&adapter->tmreg_lock);
5037                ack |= IGC_TSICR_TT0;
5038        }
5039
5040        if (tsicr & IGC_TSICR_TT1) {
5041                spin_lock(&adapter->tmreg_lock);
5042                ts = timespec64_add(adapter->perout[1].start,
5043                                    adapter->perout[1].period);
5044                wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
5045                wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec);
5046                tsauxc = rd32(IGC_TSAUXC);
5047                tsauxc |= IGC_TSAUXC_EN_TT1;
5048                wr32(IGC_TSAUXC, tsauxc);
5049                adapter->perout[1].start = ts;
5050                spin_unlock(&adapter->tmreg_lock);
5051                ack |= IGC_TSICR_TT1;
5052        }
5053
5054        if (tsicr & IGC_TSICR_AUTT0) {
5055                nsec = rd32(IGC_AUXSTMPL0);
5056                sec  = rd32(IGC_AUXSTMPH0);
5057                event.type = PTP_CLOCK_EXTTS;
5058                event.index = 0;
5059                event.timestamp = sec * NSEC_PER_SEC + nsec;
5060                ptp_clock_event(adapter->ptp_clock, &event);
5061                ack |= IGC_TSICR_AUTT0;
5062        }
5063
5064        if (tsicr & IGC_TSICR_AUTT1) {
5065                nsec = rd32(IGC_AUXSTMPL1);
5066                sec  = rd32(IGC_AUXSTMPH1);
5067                event.type = PTP_CLOCK_EXTTS;
5068                event.index = 1;
5069                event.timestamp = sec * NSEC_PER_SEC + nsec;
5070                ptp_clock_event(adapter->ptp_clock, &event);
5071                ack |= IGC_TSICR_AUTT1;
5072        }
5073
5074        /* acknowledge the interrupts */
5075        wr32(IGC_TSICR, ack);
5076}
5077
5078/**
5079 * igc_msix_other - msix other interrupt handler
5080 * @irq: interrupt number
5081 * @data: pointer to a q_vector
5082 */
5083static irqreturn_t igc_msix_other(int irq, void *data)
5084{
5085        struct igc_adapter *adapter = data;
5086        struct igc_hw *hw = &adapter->hw;
5087        u32 icr = rd32(IGC_ICR);
5088
5089        /* reading ICR causes bit 31 of EICR to be cleared */
5090        if (icr & IGC_ICR_DRSTA)
5091                schedule_work(&adapter->reset_task);
5092
5093        if (icr & IGC_ICR_DOUTSYNC) {
5094                /* HW is reporting DMA is out of sync */
5095                adapter->stats.doosync++;
5096        }
5097
5098        if (icr & IGC_ICR_LSC) {
5099                hw->mac.get_link_status = true;
5100                /* guard against interrupt when we're going down */
5101                if (!test_bit(__IGC_DOWN, &adapter->state))
5102                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5103        }
5104
5105        if (icr & IGC_ICR_TS)
5106                igc_tsync_interrupt(adapter);
5107
5108        wr32(IGC_EIMS, adapter->eims_other);
5109
5110        return IRQ_HANDLED;
5111}
5112
5113static void igc_write_itr(struct igc_q_vector *q_vector)
5114{
5115        u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
5116
5117        if (!q_vector->set_itr)
5118                return;
5119
5120        if (!itr_val)
5121                itr_val = IGC_ITR_VAL_MASK;
5122
5123        itr_val |= IGC_EITR_CNT_IGNR;
5124
5125        writel(itr_val, q_vector->itr_register);
5126        q_vector->set_itr = 0;
5127}
5128
5129static irqreturn_t igc_msix_ring(int irq, void *data)
5130{
5131        struct igc_q_vector *q_vector = data;
5132
5133        /* Write the ITR value calculated from the previous interrupt. */
5134        igc_write_itr(q_vector);
5135
5136        napi_schedule(&q_vector->napi);
5137
5138        return IRQ_HANDLED;
5139}
5140
5141/**
5142 * igc_request_msix - Initialize MSI-X interrupts
5143 * @adapter: Pointer to adapter structure
5144 *
5145 * igc_request_msix allocates MSI-X vectors and requests interrupts from the
5146 * kernel.
5147 */
5148static int igc_request_msix(struct igc_adapter *adapter)
5149{
5150        unsigned int num_q_vectors = adapter->num_q_vectors;
5151        int i = 0, err = 0, vector = 0, free_vector = 0;
5152        struct net_device *netdev = adapter->netdev;
5153
5154        err = request_irq(adapter->msix_entries[vector].vector,
5155                          &igc_msix_other, 0, netdev->name, adapter);
5156        if (err)
5157                goto err_out;
5158
5159        if (num_q_vectors > MAX_Q_VECTORS) {
5160                num_q_vectors = MAX_Q_VECTORS;
5161                dev_warn(&adapter->pdev->dev,
5162                         "The number of queue vectors (%d) is higher than max allowed (%d)\n",
5163                         adapter->num_q_vectors, MAX_Q_VECTORS);
5164        }
5165        for (i = 0; i < num_q_vectors; i++) {
5166                struct igc_q_vector *q_vector = adapter->q_vector[i];
5167
5168                vector++;
5169
5170                q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
5171
5172                if (q_vector->rx.ring && q_vector->tx.ring)
5173                        sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
5174                                q_vector->rx.ring->queue_index);
5175                else if (q_vector->tx.ring)
5176                        sprintf(q_vector->name, "%s-tx-%u", netdev->name,
5177                                q_vector->tx.ring->queue_index);
5178                else if (q_vector->rx.ring)
5179                        sprintf(q_vector->name, "%s-rx-%u", netdev->name,
5180                                q_vector->rx.ring->queue_index);
5181                else
5182                        sprintf(q_vector->name, "%s-unused", netdev->name);
5183
5184                err = request_irq(adapter->msix_entries[vector].vector,
5185                                  igc_msix_ring, 0, q_vector->name,
5186                                  q_vector);
5187                if (err)
5188                        goto err_free;
5189        }
5190
5191        igc_configure_msix(adapter);
5192        return 0;
5193
5194err_free:
5195        /* free already assigned IRQs */
5196        free_irq(adapter->msix_entries[free_vector++].vector, adapter);
5197
5198        vector--;
5199        for (i = 0; i < vector; i++) {
5200                free_irq(adapter->msix_entries[free_vector++].vector,
5201                         adapter->q_vector[i]);
5202        }
5203err_out:
5204        return err;
5205}
5206
5207/**
5208 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
5209 * @adapter: Pointer to adapter structure
5210 *
5211 * This function resets the device so that it has 0 rx queues, tx queues, and
5212 * MSI-X interrupts allocated.
5213 */
5214static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
5215{
5216        igc_free_q_vectors(adapter);
5217        igc_reset_interrupt_capability(adapter);
5218}
5219
5220/* Need to wait a few seconds after link up to get diagnostic information from
5221 * the phy
5222 */
5223static void igc_update_phy_info(struct timer_list *t)
5224{
5225        struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
5226
5227        igc_get_phy_info(&adapter->hw);
5228}
5229
5230/**
5231 * igc_has_link - check shared code for link and determine up/down
5232 * @adapter: pointer to driver private info
5233 */
5234bool igc_has_link(struct igc_adapter *adapter)
5235{
5236        struct igc_hw *hw = &adapter->hw;
5237        bool link_active = false;
5238
5239        /* get_link_status is set on LSC (link status) interrupt or
5240         * rx sequence error interrupt.  get_link_status will stay
5241         * false until the igc_check_for_link establishes link
5242         * for copper adapters ONLY
5243         */
5244        if (!hw->mac.get_link_status)
5245                return true;
5246        hw->mac.ops.check_for_link(hw);
5247        link_active = !hw->mac.get_link_status;
5248
5249        if (hw->mac.type == igc_i225) {
5250                if (!netif_carrier_ok(adapter->netdev)) {
5251                        adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
5252                } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
5253                        adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
5254                        adapter->link_check_timeout = jiffies;
5255                }
5256        }
5257
5258        return link_active;
5259}
5260
5261/**
5262 * igc_watchdog - Timer Call-back
5263 * @t: timer for the watchdog
5264 */
5265static void igc_watchdog(struct timer_list *t)
5266{
5267        struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
5268        /* Do the rest outside of interrupt context */
5269        schedule_work(&adapter->watchdog_task);
5270}
5271
5272static void igc_watchdog_task(struct work_struct *work)
5273{
5274        struct igc_adapter *adapter = container_of(work,
5275                                                   struct igc_adapter,
5276                                                   watchdog_task);
5277        struct net_device *netdev = adapter->netdev;
5278        struct igc_hw *hw = &adapter->hw;
5279        struct igc_phy_info *phy = &hw->phy;
5280        u16 phy_data, retry_count = 20;
5281        u32 link;
5282        int i;
5283
5284        link = igc_has_link(adapter);
5285
5286        if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
5287                if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
5288                        adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
5289                else
5290                        link = false;
5291        }
5292
5293        if (link) {
5294                /* Cancel scheduled suspend requests. */
5295                pm_runtime_resume(netdev->dev.parent);
5296
5297                if (!netif_carrier_ok(netdev)) {
5298                        u32 ctrl;
5299
5300                        hw->mac.ops.get_speed_and_duplex(hw,
5301                                                         &adapter->link_speed,
5302                                                         &adapter->link_duplex);
5303
5304                        ctrl = rd32(IGC_CTRL);
5305                        /* Link status message must follow this format */
5306                        netdev_info(netdev,
5307                                    "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
5308                                    adapter->link_speed,
5309                                    adapter->link_duplex == FULL_DUPLEX ?
5310                                    "Full" : "Half",
5311                                    (ctrl & IGC_CTRL_TFCE) &&
5312                                    (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
5313                                    (ctrl & IGC_CTRL_RFCE) ?  "RX" :
5314                                    (ctrl & IGC_CTRL_TFCE) ?  "TX" : "None");
5315
5316                        /* disable EEE if enabled */
5317                        if ((adapter->flags & IGC_FLAG_EEE) &&
5318                            adapter->link_duplex == HALF_DUPLEX) {
5319                                netdev_info(netdev,
5320                                            "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n");
5321                                adapter->hw.dev_spec._base.eee_enable = false;
5322                                adapter->flags &= ~IGC_FLAG_EEE;
5323                        }
5324
5325                        /* check if SmartSpeed worked */
5326                        igc_check_downshift(hw);
5327                        if (phy->speed_downgraded)
5328                                netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
5329
5330                        /* adjust timeout factor according to speed/duplex */
5331                        adapter->tx_timeout_factor = 1;
5332                        switch (adapter->link_speed) {
5333                        case SPEED_10:
5334                                adapter->tx_timeout_factor = 14;
5335                                break;
5336                        case SPEED_100:
5337                        case SPEED_1000:
5338                        case SPEED_2500:
5339                                adapter->tx_timeout_factor = 7;
5340                                break;
5341                        }
5342
5343                        if (adapter->link_speed != SPEED_1000)
5344                                goto no_wait;
5345
5346                        /* wait for Remote receiver status OK */
5347retry_read_status:
5348                        if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
5349                                              &phy_data)) {
5350                                if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
5351                                    retry_count) {
5352                                        msleep(100);
5353                                        retry_count--;
5354                                        goto retry_read_status;
5355                                } else if (!retry_count) {
5356                                        netdev_err(netdev, "exceed max 2 second\n");
5357                                }
5358                        } else {
5359                                netdev_err(netdev, "read 1000Base-T Status Reg\n");
5360                        }
5361no_wait:
5362                        netif_carrier_on(netdev);
5363
5364                        /* link state has changed, schedule phy info update */
5365                        if (!test_bit(__IGC_DOWN, &adapter->state))
5366                                mod_timer(&adapter->phy_info_timer,
5367                                          round_jiffies(jiffies + 2 * HZ));
5368                }
5369        } else {
5370                if (netif_carrier_ok(netdev)) {
5371                        adapter->link_speed = 0;
5372                        adapter->link_duplex = 0;
5373
5374                        /* Links status message must follow this format */
5375                        netdev_info(netdev, "NIC Link is Down\n");
5376                        netif_carrier_off(netdev);
5377
5378                        /* link state has changed, schedule phy info update */
5379                        if (!test_bit(__IGC_DOWN, &adapter->state))
5380                                mod_timer(&adapter->phy_info_timer,
5381                                          round_jiffies(jiffies + 2 * HZ));
5382
5383                        /* link is down, time to check for alternate media */
5384                        if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
5385                                if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
5386                                        schedule_work(&adapter->reset_task);
5387                                        /* return immediately */
5388                                        return;
5389                                }
5390                        }
5391                        pm_schedule_suspend(netdev->dev.parent,
5392                                            MSEC_PER_SEC * 5);
5393
5394                /* also check for alternate media here */
5395                } else if (!netif_carrier_ok(netdev) &&
5396                           (adapter->flags & IGC_FLAG_MAS_ENABLE)) {
5397                        if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
5398                                schedule_work(&adapter->reset_task);
5399                                /* return immediately */
5400                                return;
5401                        }
5402                }
5403        }
5404
5405        spin_lock(&adapter->stats64_lock);
5406        igc_update_stats(adapter);
5407        spin_unlock(&adapter->stats64_lock);
5408
5409        for (i = 0; i < adapter->num_tx_queues; i++) {
5410                struct igc_ring *tx_ring = adapter->tx_ring[i];
5411
5412                if (!netif_carrier_ok(netdev)) {
5413                        /* We've lost link, so the controller stops DMA,
5414                         * but we've got queued Tx work that's never going
5415                         * to get done, so reset controller to flush Tx.
5416                         * (Do the reset outside of interrupt context).
5417                         */
5418                        if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
5419                                adapter->tx_timeout_count++;
5420                                schedule_work(&adapter->reset_task);
5421                                /* return immediately since reset is imminent */
5422                                return;
5423                        }
5424                }
5425
5426                /* Force detection of hung controller every watchdog period */
5427                set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5428        }
5429
5430        /* Cause software interrupt to ensure Rx ring is cleaned */
5431        if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5432                u32 eics = 0;
5433
5434                for (i = 0; i < adapter->num_q_vectors; i++)
5435                        eics |= adapter->q_vector[i]->eims_value;
5436                wr32(IGC_EICS, eics);
5437        } else {
5438                wr32(IGC_ICS, IGC_ICS_RXDMT0);
5439        }
5440
5441        igc_ptp_tx_hang(adapter);
5442
5443        /* Reset the timer */
5444        if (!test_bit(__IGC_DOWN, &adapter->state)) {
5445                if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
5446                        mod_timer(&adapter->watchdog_timer,
5447                                  round_jiffies(jiffies +  HZ));
5448                else
5449                        mod_timer(&adapter->watchdog_timer,
5450                                  round_jiffies(jiffies + 2 * HZ));
5451        }
5452}
5453
5454/**
5455 * igc_intr_msi - Interrupt Handler
5456 * @irq: interrupt number
5457 * @data: pointer to a network interface device structure
5458 */
5459static irqreturn_t igc_intr_msi(int irq, void *data)
5460{
5461        struct igc_adapter *adapter = data;
5462        struct igc_q_vector *q_vector = adapter->q_vector[0];
5463        struct igc_hw *hw = &adapter->hw;
5464        /* read ICR disables interrupts using IAM */
5465        u32 icr = rd32(IGC_ICR);
5466
5467        igc_write_itr(q_vector);
5468
5469        if (icr & IGC_ICR_DRSTA)
5470                schedule_work(&adapter->reset_task);
5471
5472        if (icr & IGC_ICR_DOUTSYNC) {
5473                /* HW is reporting DMA is out of sync */
5474                adapter->stats.doosync++;
5475        }
5476
5477        if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5478                hw->mac.get_link_status = true;
5479                if (!test_bit(__IGC_DOWN, &adapter->state))
5480                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5481        }
5482
5483        if (icr & IGC_ICR_TS)
5484                igc_tsync_interrupt(adapter);
5485
5486        napi_schedule(&q_vector->napi);
5487
5488        return IRQ_HANDLED;
5489}
5490
5491/**
5492 * igc_intr - Legacy Interrupt Handler
5493 * @irq: interrupt number
5494 * @data: pointer to a network interface device structure
5495 */
5496static irqreturn_t igc_intr(int irq, void *data)
5497{
5498        struct igc_adapter *adapter = data;
5499        struct igc_q_vector *q_vector = adapter->q_vector[0];
5500        struct igc_hw *hw = &adapter->hw;
5501        /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5502         * need for the IMC write
5503         */
5504        u32 icr = rd32(IGC_ICR);
5505
5506        /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5507         * not set, then the adapter didn't send an interrupt
5508         */
5509        if (!(icr & IGC_ICR_INT_ASSERTED))
5510                return IRQ_NONE;
5511
5512        igc_write_itr(q_vector);
5513
5514        if (icr & IGC_ICR_DRSTA)
5515                schedule_work(&adapter->reset_task);
5516
5517        if (icr & IGC_ICR_DOUTSYNC) {
5518                /* HW is reporting DMA is out of sync */
5519                adapter->stats.doosync++;
5520        }
5521
5522        if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5523                hw->mac.get_link_status = true;
5524                /* guard against interrupt when we're going down */
5525                if (!test_bit(__IGC_DOWN, &adapter->state))
5526                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5527        }
5528
5529        if (icr & IGC_ICR_TS)
5530                igc_tsync_interrupt(adapter);
5531
5532        napi_schedule(&q_vector->napi);
5533
5534        return IRQ_HANDLED;
5535}
5536
5537static void igc_free_irq(struct igc_adapter *adapter)
5538{
5539        if (adapter->msix_entries) {
5540                int vector = 0, i;
5541
5542                free_irq(adapter->msix_entries[vector++].vector, adapter);
5543
5544                for (i = 0; i < adapter->num_q_vectors; i++)
5545                        free_irq(adapter->msix_entries[vector++].vector,
5546                                 adapter->q_vector[i]);
5547        } else {
5548                free_irq(adapter->pdev->irq, adapter);
5549        }
5550}
5551
5552/**
5553 * igc_request_irq - initialize interrupts
5554 * @adapter: Pointer to adapter structure
5555 *
5556 * Attempts to configure interrupts using the best available
5557 * capabilities of the hardware and kernel.
5558 */
5559static int igc_request_irq(struct igc_adapter *adapter)
5560{
5561        struct net_device *netdev = adapter->netdev;
5562        struct pci_dev *pdev = adapter->pdev;
5563        int err = 0;
5564
5565        if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5566                err = igc_request_msix(adapter);
5567                if (!err)
5568                        goto request_done;
5569                /* fall back to MSI */
5570                igc_free_all_tx_resources(adapter);
5571                igc_free_all_rx_resources(adapter);
5572
5573                igc_clear_interrupt_scheme(adapter);
5574                err = igc_init_interrupt_scheme(adapter, false);
5575                if (err)
5576                        goto request_done;
5577                igc_setup_all_tx_resources(adapter);
5578                igc_setup_all_rx_resources(adapter);
5579                igc_configure(adapter);
5580        }
5581
5582        igc_assign_vector(adapter->q_vector[0], 0);
5583
5584        if (adapter->flags & IGC_FLAG_HAS_MSI) {
5585                err = request_irq(pdev->irq, &igc_intr_msi, 0,
5586                                  netdev->name, adapter);
5587                if (!err)
5588                        goto request_done;
5589
5590                /* fall back to legacy interrupts */
5591                igc_reset_interrupt_capability(adapter);
5592                adapter->flags &= ~IGC_FLAG_HAS_MSI;
5593        }
5594
5595        err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
5596                          netdev->name, adapter);
5597
5598        if (err)
5599                netdev_err(netdev, "Error %d getting interrupt\n", err);
5600
5601request_done:
5602        return err;
5603}
5604
5605/**
5606 * __igc_open - Called when a network interface is made active
5607 * @netdev: network interface device structure
5608 * @resuming: boolean indicating if the device is resuming
5609 *
5610 * Returns 0 on success, negative value on failure
5611 *
5612 * The open entry point is called when a network interface is made
5613 * active by the system (IFF_UP).  At this point all resources needed
5614 * for transmit and receive operations are allocated, the interrupt
5615 * handler is registered with the OS, the watchdog timer is started,
5616 * and the stack is notified that the interface is ready.
5617 */
5618static int __igc_open(struct net_device *netdev, bool resuming)
5619{
5620        struct igc_adapter *adapter = netdev_priv(netdev);
5621        struct pci_dev *pdev = adapter->pdev;
5622        struct igc_hw *hw = &adapter->hw;
5623        int err = 0;
5624        int i = 0;
5625
5626        /* disallow open during test */
5627
5628        if (test_bit(__IGC_TESTING, &adapter->state)) {
5629                WARN_ON(resuming);
5630                return -EBUSY;
5631        }
5632
5633        if (!resuming)
5634                pm_runtime_get_sync(&pdev->dev);
5635
5636        netif_carrier_off(netdev);
5637
5638        /* allocate transmit descriptors */
5639        err = igc_setup_all_tx_resources(adapter);
5640        if (err)
5641                goto err_setup_tx;
5642
5643        /* allocate receive descriptors */
5644        err = igc_setup_all_rx_resources(adapter);
5645        if (err)
5646                goto err_setup_rx;
5647
5648        igc_power_up_link(adapter);
5649
5650        igc_configure(adapter);
5651
5652        err = igc_request_irq(adapter);
5653        if (err)
5654                goto err_req_irq;
5655
5656        /* Notify the stack of the actual queue counts. */
5657        err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
5658        if (err)
5659                goto err_set_queues;
5660
5661        err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
5662        if (err)
5663                goto err_set_queues;
5664
5665        clear_bit(__IGC_DOWN, &adapter->state);
5666
5667        for (i = 0; i < adapter->num_q_vectors; i++)
5668                napi_enable(&adapter->q_vector[i]->napi);
5669
5670        /* Clear any pending interrupts. */
5671        rd32(IGC_ICR);
5672        igc_irq_enable(adapter);
5673
5674        if (!resuming)
5675                pm_runtime_put(&pdev->dev);
5676
5677        netif_tx_start_all_queues(netdev);
5678
5679        /* start the watchdog. */
5680        hw->mac.get_link_status = true;
5681        schedule_work(&adapter->watchdog_task);
5682
5683        return IGC_SUCCESS;
5684
5685err_set_queues:
5686        igc_free_irq(adapter);
5687err_req_irq:
5688        igc_release_hw_control(adapter);
5689        igc_power_down_phy_copper_base(&adapter->hw);
5690        igc_free_all_rx_resources(adapter);
5691err_setup_rx:
5692        igc_free_all_tx_resources(adapter);
5693err_setup_tx:
5694        igc_reset(adapter);
5695        if (!resuming)
5696                pm_runtime_put(&pdev->dev);
5697
5698        return err;
5699}
5700
5701int igc_open(struct net_device *netdev)
5702{
5703        return __igc_open(netdev, false);
5704}
5705
5706/**
5707 * __igc_close - Disables a network interface
5708 * @netdev: network interface device structure
5709 * @suspending: boolean indicating the device is suspending
5710 *
5711 * Returns 0, this is not allowed to fail
5712 *
5713 * The close entry point is called when an interface is de-activated
5714 * by the OS.  The hardware is still under the driver's control, but
5715 * needs to be disabled.  A global MAC reset is issued to stop the
5716 * hardware, and all transmit and receive resources are freed.
5717 */
5718static int __igc_close(struct net_device *netdev, bool suspending)
5719{
5720        struct igc_adapter *adapter = netdev_priv(netdev);
5721        struct pci_dev *pdev = adapter->pdev;
5722
5723        WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
5724
5725        if (!suspending)
5726                pm_runtime_get_sync(&pdev->dev);
5727
5728        igc_down(adapter);
5729
5730        igc_release_hw_control(adapter);
5731
5732        igc_free_irq(adapter);
5733
5734        igc_free_all_tx_resources(adapter);
5735        igc_free_all_rx_resources(adapter);
5736
5737        if (!suspending)
5738                pm_runtime_put_sync(&pdev->dev);
5739
5740        return 0;
5741}
5742
5743int igc_close(struct net_device *netdev)
5744{
5745        if (netif_device_present(netdev) || netdev->dismantle)
5746                return __igc_close(netdev, false);
5747        return 0;
5748}
5749
5750/**
5751 * igc_ioctl - Access the hwtstamp interface
5752 * @netdev: network interface device structure
5753 * @ifr: interface request data
5754 * @cmd: ioctl command
5755 **/
5756static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5757{
5758        switch (cmd) {
5759        case SIOCGHWTSTAMP:
5760                return igc_ptp_get_ts_config(netdev, ifr);
5761        case SIOCSHWTSTAMP:
5762                return igc_ptp_set_ts_config(netdev, ifr);
5763        default:
5764                return -EOPNOTSUPP;
5765        }
5766}
5767
5768static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
5769                                      bool enable)
5770{
5771        struct igc_ring *ring;
5772
5773        if (queue < 0 || queue >= adapter->num_tx_queues)
5774                return -EINVAL;
5775
5776        ring = adapter->tx_ring[queue];
5777        ring->launchtime_enable = enable;
5778
5779        return 0;
5780}
5781
5782static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now)
5783{
5784        struct timespec64 b;
5785
5786        b = ktime_to_timespec64(base_time);
5787
5788        return timespec64_compare(now, &b) > 0;
5789}
5790
5791static bool validate_schedule(struct igc_adapter *adapter,
5792                              const struct tc_taprio_qopt_offload *qopt)
5793{
5794        int queue_uses[IGC_MAX_TX_QUEUES] = { };
5795        struct timespec64 now;
5796        size_t n;
5797
5798        if (qopt->cycle_time_extension)
5799                return false;
5800
5801        igc_ptp_read(adapter, &now);
5802
5803        /* If we program the controller's BASET registers with a time
5804         * in the future, it will hold all the packets until that
5805         * time, causing a lot of TX Hangs, so to avoid that, we
5806         * reject schedules that would start in the future.
5807         */
5808        if (!is_base_time_past(qopt->base_time, &now))
5809                return false;
5810
5811        for (n = 0; n < qopt->num_entries; n++) {
5812                const struct tc_taprio_sched_entry *e;
5813                int i;
5814
5815                e = &qopt->entries[n];
5816
5817                /* i225 only supports "global" frame preemption
5818                 * settings.
5819                 */
5820                if (e->command != TC_TAPRIO_CMD_SET_GATES)
5821                        return false;
5822
5823                for (i = 0; i < adapter->num_tx_queues; i++) {
5824                        if (e->gate_mask & BIT(i))
5825                                queue_uses[i]++;
5826
5827                        if (queue_uses[i] > 1)
5828                                return false;
5829                }
5830        }
5831
5832        return true;
5833}
5834
5835static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
5836                                     struct tc_etf_qopt_offload *qopt)
5837{
5838        struct igc_hw *hw = &adapter->hw;
5839        int err;
5840
5841        if (hw->mac.type != igc_i225)
5842                return -EOPNOTSUPP;
5843
5844        err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable);
5845        if (err)
5846                return err;
5847
5848        return igc_tsn_offload_apply(adapter);
5849}
5850
5851static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
5852{
5853        int i;
5854
5855        adapter->base_time = 0;
5856        adapter->cycle_time = NSEC_PER_SEC;
5857
5858        for (i = 0; i < adapter->num_tx_queues; i++) {
5859                struct igc_ring *ring = adapter->tx_ring[i];
5860
5861                ring->start_time = 0;
5862                ring->end_time = NSEC_PER_SEC;
5863        }
5864
5865        return 0;
5866}
5867
5868static int igc_save_qbv_schedule(struct igc_adapter *adapter,
5869                                 struct tc_taprio_qopt_offload *qopt)
5870{
5871        u32 start_time = 0, end_time = 0;
5872        size_t n;
5873
5874        if (!qopt->enable)
5875                return igc_tsn_clear_schedule(adapter);
5876
5877        if (adapter->base_time)
5878                return -EALREADY;
5879
5880        if (!validate_schedule(adapter, qopt))
5881                return -EINVAL;
5882
5883        adapter->cycle_time = qopt->cycle_time;
5884        adapter->base_time = qopt->base_time;
5885
5886        /* FIXME: be a little smarter about cases when the gate for a
5887         * queue stays open for more than one entry.
5888         */
5889        for (n = 0; n < qopt->num_entries; n++) {
5890                struct tc_taprio_sched_entry *e = &qopt->entries[n];
5891                int i;
5892
5893                end_time += e->interval;
5894
5895                for (i = 0; i < adapter->num_tx_queues; i++) {
5896                        struct igc_ring *ring = adapter->tx_ring[i];
5897
5898                        if (!(e->gate_mask & BIT(i)))
5899                                continue;
5900
5901                        ring->start_time = start_time;
5902                        ring->end_time = end_time;
5903                }
5904
5905                start_time += e->interval;
5906        }
5907
5908        return 0;
5909}
5910
5911static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
5912                                         struct tc_taprio_qopt_offload *qopt)
5913{
5914        struct igc_hw *hw = &adapter->hw;
5915        int err;
5916
5917        if (hw->mac.type != igc_i225)
5918                return -EOPNOTSUPP;
5919
5920        err = igc_save_qbv_schedule(adapter, qopt);
5921        if (err)
5922                return err;
5923
5924        return igc_tsn_offload_apply(adapter);
5925}
5926
5927static int igc_save_cbs_params(struct igc_adapter *adapter, int queue,
5928                               bool enable, int idleslope, int sendslope,
5929                               int hicredit, int locredit)
5930{
5931        bool cbs_status[IGC_MAX_SR_QUEUES] = { false };
5932        struct net_device *netdev = adapter->netdev;
5933        struct igc_ring *ring;
5934        int i;
5935
5936        /* i225 has two sets of credit-based shaper logic.
5937         * Supporting it only on the top two priority queues
5938         */
5939        if (queue < 0 || queue > 1)
5940                return -EINVAL;
5941
5942        ring = adapter->tx_ring[queue];
5943
5944        for (i = 0; i < IGC_MAX_SR_QUEUES; i++)
5945                if (adapter->tx_ring[i])
5946                        cbs_status[i] = adapter->tx_ring[i]->cbs_enable;
5947
5948        /* CBS should be enabled on the highest priority queue first in order
5949         * for the CBS algorithm to operate as intended.
5950         */
5951        if (enable) {
5952                if (queue == 1 && !cbs_status[0]) {
5953                        netdev_err(netdev,
5954                                   "Enabling CBS on queue1 before queue0\n");
5955                        return -EINVAL;
5956                }
5957        } else {
5958                if (queue == 0 && cbs_status[1]) {
5959                        netdev_err(netdev,
5960                                   "Disabling CBS on queue0 before queue1\n");
5961                        return -EINVAL;
5962                }
5963        }
5964
5965        ring->cbs_enable = enable;
5966        ring->idleslope = idleslope;
5967        ring->sendslope = sendslope;
5968        ring->hicredit = hicredit;
5969        ring->locredit = locredit;
5970
5971        return 0;
5972}
5973
5974static int igc_tsn_enable_cbs(struct igc_adapter *adapter,
5975                              struct tc_cbs_qopt_offload *qopt)
5976{
5977        struct igc_hw *hw = &adapter->hw;
5978        int err;
5979
5980        if (hw->mac.type != igc_i225)
5981                return -EOPNOTSUPP;
5982
5983        if (qopt->queue < 0 || qopt->queue > 1)
5984                return -EINVAL;
5985
5986        err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable,
5987                                  qopt->idleslope, qopt->sendslope,
5988                                  qopt->hicredit, qopt->locredit);
5989        if (err)
5990                return err;
5991
5992        return igc_tsn_offload_apply(adapter);
5993}
5994
5995static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
5996                        void *type_data)
5997{
5998        struct igc_adapter *adapter = netdev_priv(dev);
5999
6000        switch (type) {
6001        case TC_SETUP_QDISC_TAPRIO:
6002                return igc_tsn_enable_qbv_scheduling(adapter, type_data);
6003
6004        case TC_SETUP_QDISC_ETF:
6005                return igc_tsn_enable_launchtime(adapter, type_data);
6006
6007        case TC_SETUP_QDISC_CBS:
6008                return igc_tsn_enable_cbs(adapter, type_data);
6009
6010        default:
6011                return -EOPNOTSUPP;
6012        }
6013}
6014
6015static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
6016{
6017        struct igc_adapter *adapter = netdev_priv(dev);
6018
6019        switch (bpf->command) {
6020        case XDP_SETUP_PROG:
6021                return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
6022        case XDP_SETUP_XSK_POOL:
6023                return igc_xdp_setup_pool(adapter, bpf->xsk.pool,
6024                                          bpf->xsk.queue_id);
6025        default:
6026                return -EOPNOTSUPP;
6027        }
6028}
6029
6030static int igc_xdp_xmit(struct net_device *dev, int num_frames,
6031                        struct xdp_frame **frames, u32 flags)
6032{
6033        struct igc_adapter *adapter = netdev_priv(dev);
6034        int cpu = smp_processor_id();
6035        struct netdev_queue *nq;
6036        struct igc_ring *ring;
6037        int i, drops;
6038
6039        if (unlikely(test_bit(__IGC_DOWN, &adapter->state)))
6040                return -ENETDOWN;
6041
6042        if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
6043                return -EINVAL;
6044
6045        ring = igc_xdp_get_tx_ring(adapter, cpu);
6046        nq = txring_txq(ring);
6047
6048        __netif_tx_lock(nq, cpu);
6049
6050        drops = 0;
6051        for (i = 0; i < num_frames; i++) {
6052                int err;
6053                struct xdp_frame *xdpf = frames[i];
6054
6055                err = igc_xdp_init_tx_descriptor(ring, xdpf);
6056                if (err) {
6057                        xdp_return_frame_rx_napi(xdpf);
6058                        drops++;
6059                }
6060        }
6061
6062        if (flags & XDP_XMIT_FLUSH)
6063                igc_flush_tx_descriptors(ring);
6064
6065        __netif_tx_unlock(nq);
6066
6067        return num_frames - drops;
6068}
6069
6070static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
6071                                        struct igc_q_vector *q_vector)
6072{
6073        struct igc_hw *hw = &adapter->hw;
6074        u32 eics = 0;
6075
6076        eics |= q_vector->eims_value;
6077        wr32(IGC_EICS, eics);
6078}
6079
6080int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
6081{
6082        struct igc_adapter *adapter = netdev_priv(dev);
6083        struct igc_q_vector *q_vector;
6084        struct igc_ring *ring;
6085
6086        if (test_bit(__IGC_DOWN, &adapter->state))
6087                return -ENETDOWN;
6088
6089        if (!igc_xdp_is_enabled(adapter))
6090                return -ENXIO;
6091
6092        if (queue_id >= adapter->num_rx_queues)
6093                return -EINVAL;
6094
6095        ring = adapter->rx_ring[queue_id];
6096
6097        if (!ring->xsk_pool)
6098                return -ENXIO;
6099
6100        q_vector = adapter->q_vector[queue_id];
6101        if (!napi_if_scheduled_mark_missed(&q_vector->napi))
6102                igc_trigger_rxtxq_interrupt(adapter, q_vector);
6103
6104        return 0;
6105}
6106
6107static const struct net_device_ops igc_netdev_ops = {
6108        .ndo_open               = igc_open,
6109        .ndo_stop               = igc_close,
6110        .ndo_start_xmit         = igc_xmit_frame,
6111        .ndo_set_rx_mode        = igc_set_rx_mode,
6112        .ndo_set_mac_address    = igc_set_mac,
6113        .ndo_change_mtu         = igc_change_mtu,
6114        .ndo_get_stats64        = igc_get_stats64,
6115        .ndo_fix_features       = igc_fix_features,
6116        .ndo_set_features       = igc_set_features,
6117        .ndo_features_check     = igc_features_check,
6118        .ndo_eth_ioctl          = igc_ioctl,
6119        .ndo_setup_tc           = igc_setup_tc,
6120        .ndo_bpf                = igc_bpf,
6121        .ndo_xdp_xmit           = igc_xdp_xmit,
6122        .ndo_xsk_wakeup         = igc_xsk_wakeup,
6123};
6124
6125/* PCIe configuration access */
6126void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
6127{
6128        struct igc_adapter *adapter = hw->back;
6129
6130        pci_read_config_word(adapter->pdev, reg, value);
6131}
6132
6133void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
6134{
6135        struct igc_adapter *adapter = hw->back;
6136
6137        pci_write_config_word(adapter->pdev, reg, *value);
6138}
6139
6140s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
6141{
6142        struct igc_adapter *adapter = hw->back;
6143
6144        if (!pci_is_pcie(adapter->pdev))
6145                return -IGC_ERR_CONFIG;
6146
6147        pcie_capability_read_word(adapter->pdev, reg, value);
6148
6149        return IGC_SUCCESS;
6150}
6151
6152s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
6153{
6154        struct igc_adapter *adapter = hw->back;
6155
6156        if (!pci_is_pcie(adapter->pdev))
6157                return -IGC_ERR_CONFIG;
6158
6159        pcie_capability_write_word(adapter->pdev, reg, *value);
6160
6161        return IGC_SUCCESS;
6162}
6163
6164u32 igc_rd32(struct igc_hw *hw, u32 reg)
6165{
6166        struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
6167        u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
6168        u32 value = 0;
6169
6170        value = readl(&hw_addr[reg]);
6171
6172        /* reads should not return all F's */
6173        if (!(~value) && (!reg || !(~readl(hw_addr)))) {
6174                struct net_device *netdev = igc->netdev;
6175
6176                hw->hw_addr = NULL;
6177                netif_device_detach(netdev);
6178                netdev_err(netdev, "PCIe link lost, device now detached\n");
6179                WARN(pci_device_is_present(igc->pdev),
6180                     "igc: Failed to read reg 0x%x!\n", reg);
6181        }
6182
6183        return value;
6184}
6185
6186int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
6187{
6188        struct igc_mac_info *mac = &adapter->hw.mac;
6189
6190        mac->autoneg = false;
6191
6192        /* Make sure dplx is at most 1 bit and lsb of speed is not set
6193         * for the switch() below to work
6194         */
6195        if ((spd & 1) || (dplx & ~1))
6196                goto err_inval;
6197
6198        switch (spd + dplx) {
6199        case SPEED_10 + DUPLEX_HALF:
6200                mac->forced_speed_duplex = ADVERTISE_10_HALF;
6201                break;
6202        case SPEED_10 + DUPLEX_FULL:
6203                mac->forced_speed_duplex = ADVERTISE_10_FULL;
6204                break;
6205        case SPEED_100 + DUPLEX_HALF:
6206                mac->forced_speed_duplex = ADVERTISE_100_HALF;
6207                break;
6208        case SPEED_100 + DUPLEX_FULL:
6209                mac->forced_speed_duplex = ADVERTISE_100_FULL;
6210                break;
6211        case SPEED_1000 + DUPLEX_FULL:
6212                mac->autoneg = true;
6213                adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6214                break;
6215        case SPEED_1000 + DUPLEX_HALF: /* not supported */
6216                goto err_inval;
6217        case SPEED_2500 + DUPLEX_FULL:
6218                mac->autoneg = true;
6219                adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL;
6220                break;
6221        case SPEED_2500 + DUPLEX_HALF: /* not supported */
6222        default:
6223                goto err_inval;
6224        }
6225
6226        /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
6227        adapter->hw.phy.mdix = AUTO_ALL_MODES;
6228
6229        return 0;
6230
6231err_inval:
6232        netdev_err(adapter->netdev, "Unsupported Speed/Duplex configuration\n");
6233        return -EINVAL;
6234}
6235
6236/**
6237 * igc_probe - Device Initialization Routine
6238 * @pdev: PCI device information struct
6239 * @ent: entry in igc_pci_tbl
6240 *
6241 * Returns 0 on success, negative on failure
6242 *
6243 * igc_probe initializes an adapter identified by a pci_dev structure.
6244 * The OS initialization, configuring the adapter private structure,
6245 * and a hardware reset occur.
6246 */
6247static int igc_probe(struct pci_dev *pdev,
6248                     const struct pci_device_id *ent)
6249{
6250        struct igc_adapter *adapter;
6251        struct net_device *netdev;
6252        struct igc_hw *hw;
6253        const struct igc_info *ei = igc_info_tbl[ent->driver_data];
6254        int err, pci_using_dac;
6255
6256        err = pci_enable_device_mem(pdev);
6257        if (err)
6258                return err;
6259
6260        pci_using_dac = 0;
6261        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
6262        if (!err) {
6263                pci_using_dac = 1;
6264        } else {
6265                err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
6266                if (err) {
6267                        dev_err(&pdev->dev,
6268                                "No usable DMA configuration, aborting\n");
6269                        goto err_dma;
6270                }
6271        }
6272
6273        err = pci_request_mem_regions(pdev, igc_driver_name);
6274        if (err)
6275                goto err_pci_reg;
6276
6277        pci_enable_pcie_error_reporting(pdev);
6278
6279        err = pci_enable_ptm(pdev, NULL);
6280        if (err < 0)
6281                dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n");
6282
6283        pci_set_master(pdev);
6284
6285        err = -ENOMEM;
6286        netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
6287                                   IGC_MAX_TX_QUEUES);
6288
6289        if (!netdev)
6290                goto err_alloc_etherdev;
6291
6292        SET_NETDEV_DEV(netdev, &pdev->dev);
6293
6294        pci_set_drvdata(pdev, netdev);
6295        adapter = netdev_priv(netdev);
6296        adapter->netdev = netdev;
6297        adapter->pdev = pdev;
6298        hw = &adapter->hw;
6299        hw->back = adapter;
6300        adapter->port_num = hw->bus.func;
6301        adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
6302
6303        err = pci_save_state(pdev);
6304        if (err)
6305                goto err_ioremap;
6306
6307        err = -EIO;
6308        adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
6309                                   pci_resource_len(pdev, 0));
6310        if (!adapter->io_addr)
6311                goto err_ioremap;
6312
6313        /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
6314        hw->hw_addr = adapter->io_addr;
6315
6316        netdev->netdev_ops = &igc_netdev_ops;
6317        igc_ethtool_set_ops(netdev);
6318        netdev->watchdog_timeo = 5 * HZ;
6319
6320        netdev->mem_start = pci_resource_start(pdev, 0);
6321        netdev->mem_end = pci_resource_end(pdev, 0);
6322
6323        /* PCI config space info */
6324        hw->vendor_id = pdev->vendor;
6325        hw->device_id = pdev->device;
6326        hw->revision_id = pdev->revision;
6327        hw->subsystem_vendor_id = pdev->subsystem_vendor;
6328        hw->subsystem_device_id = pdev->subsystem_device;
6329
6330        /* Copy the default MAC and PHY function pointers */
6331        memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
6332        memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
6333
6334        /* Initialize skew-specific constants */
6335        err = ei->get_invariants(hw);
6336        if (err)
6337                goto err_sw_init;
6338
6339        /* Add supported features to the features list*/
6340        netdev->features |= NETIF_F_SG;
6341        netdev->features |= NETIF_F_TSO;
6342        netdev->features |= NETIF_F_TSO6;
6343        netdev->features |= NETIF_F_TSO_ECN;
6344        netdev->features |= NETIF_F_RXCSUM;
6345        netdev->features |= NETIF_F_HW_CSUM;
6346        netdev->features |= NETIF_F_SCTP_CRC;
6347        netdev->features |= NETIF_F_HW_TC;
6348
6349#define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
6350                                  NETIF_F_GSO_GRE_CSUM | \
6351                                  NETIF_F_GSO_IPXIP4 | \
6352                                  NETIF_F_GSO_IPXIP6 | \
6353                                  NETIF_F_GSO_UDP_TUNNEL | \
6354                                  NETIF_F_GSO_UDP_TUNNEL_CSUM)
6355
6356        netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES;
6357        netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES;
6358
6359        /* setup the private structure */
6360        err = igc_sw_init(adapter);
6361        if (err)
6362                goto err_sw_init;
6363
6364        /* copy netdev features into list of user selectable features */
6365        netdev->hw_features |= NETIF_F_NTUPLE;
6366        netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
6367        netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
6368        netdev->hw_features |= netdev->features;
6369
6370        if (pci_using_dac)
6371                netdev->features |= NETIF_F_HIGHDMA;
6372
6373        netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
6374        netdev->mpls_features |= NETIF_F_HW_CSUM;
6375        netdev->hw_enc_features |= netdev->vlan_features;
6376
6377        /* MTU range: 68 - 9216 */
6378        netdev->min_mtu = ETH_MIN_MTU;
6379        netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
6380
6381        /* before reading the NVM, reset the controller to put the device in a
6382         * known good starting state
6383         */
6384        hw->mac.ops.reset_hw(hw);
6385
6386        if (igc_get_flash_presence_i225(hw)) {
6387                if (hw->nvm.ops.validate(hw) < 0) {
6388                        dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
6389                        err = -EIO;
6390                        goto err_eeprom;
6391                }
6392        }
6393
6394        if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
6395                /* copy the MAC address out of the NVM */
6396                if (hw->mac.ops.read_mac_addr(hw))
6397                        dev_err(&pdev->dev, "NVM Read Error\n");
6398        }
6399
6400        eth_hw_addr_set(netdev, hw->mac.addr);
6401
6402        if (!is_valid_ether_addr(netdev->dev_addr)) {
6403                dev_err(&pdev->dev, "Invalid MAC Address\n");
6404                err = -EIO;
6405                goto err_eeprom;
6406        }
6407
6408        /* configure RXPBSIZE and TXPBSIZE */
6409        wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
6410        wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
6411
6412        timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
6413        timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
6414
6415        INIT_WORK(&adapter->reset_task, igc_reset_task);
6416        INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
6417
6418        /* Initialize link properties that are user-changeable */
6419        adapter->fc_autoneg = true;
6420        hw->mac.autoneg = true;
6421        hw->phy.autoneg_advertised = 0xaf;
6422
6423        hw->fc.requested_mode = igc_fc_default;
6424        hw->fc.current_mode = igc_fc_default;
6425
6426        /* By default, support wake on port A */
6427        adapter->flags |= IGC_FLAG_WOL_SUPPORTED;
6428
6429        /* initialize the wol settings based on the eeprom settings */
6430        if (adapter->flags & IGC_FLAG_WOL_SUPPORTED)
6431                adapter->wol |= IGC_WUFC_MAG;
6432
6433        device_set_wakeup_enable(&adapter->pdev->dev,
6434                                 adapter->flags & IGC_FLAG_WOL_SUPPORTED);
6435
6436        igc_ptp_init(adapter);
6437
6438        igc_tsn_clear_schedule(adapter);
6439
6440        /* reset the hardware with the new settings */
6441        igc_reset(adapter);
6442
6443        /* let the f/w know that the h/w is now under the control of the
6444         * driver.
6445         */
6446        igc_get_hw_control(adapter);
6447
6448        strncpy(netdev->name, "eth%d", IFNAMSIZ);
6449        err = register_netdev(netdev);
6450        if (err)
6451                goto err_register;
6452
6453         /* carrier off reporting is important to ethtool even BEFORE open */
6454        netif_carrier_off(netdev);
6455
6456        /* Check if Media Autosense is enabled */
6457        adapter->ei = *ei;
6458
6459        /* print pcie link status and MAC address */
6460        pcie_print_link_status(pdev);
6461        netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
6462
6463        dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
6464        /* Disable EEE for internal PHY devices */
6465        hw->dev_spec._base.eee_enable = false;
6466        adapter->flags &= ~IGC_FLAG_EEE;
6467        igc_set_eee_i225(hw, false, false, false);
6468
6469        pm_runtime_put_noidle(&pdev->dev);
6470
6471        return 0;
6472
6473err_register:
6474        igc_release_hw_control(adapter);
6475err_eeprom:
6476        if (!igc_check_reset_block(hw))
6477                igc_reset_phy(hw);
6478err_sw_init:
6479        igc_clear_interrupt_scheme(adapter);
6480        iounmap(adapter->io_addr);
6481err_ioremap:
6482        free_netdev(netdev);
6483err_alloc_etherdev:
6484        pci_disable_pcie_error_reporting(pdev);
6485        pci_release_mem_regions(pdev);
6486err_pci_reg:
6487err_dma:
6488        pci_disable_device(pdev);
6489        return err;
6490}
6491
6492/**
6493 * igc_remove - Device Removal Routine
6494 * @pdev: PCI device information struct
6495 *
6496 * igc_remove is called by the PCI subsystem to alert the driver
6497 * that it should release a PCI device.  This could be caused by a
6498 * Hot-Plug event, or because the driver is going to be removed from
6499 * memory.
6500 */
6501static void igc_remove(struct pci_dev *pdev)
6502{
6503        struct net_device *netdev = pci_get_drvdata(pdev);
6504        struct igc_adapter *adapter = netdev_priv(netdev);
6505
6506        pm_runtime_get_noresume(&pdev->dev);
6507
6508        igc_flush_nfc_rules(adapter);
6509
6510        igc_ptp_stop(adapter);
6511
6512        set_bit(__IGC_DOWN, &adapter->state);
6513
6514        del_timer_sync(&adapter->watchdog_timer);
6515        del_timer_sync(&adapter->phy_info_timer);
6516
6517        cancel_work_sync(&adapter->reset_task);
6518        cancel_work_sync(&adapter->watchdog_task);
6519
6520        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6521         * would have already happened in close and is redundant.
6522         */
6523        igc_release_hw_control(adapter);
6524        unregister_netdev(netdev);
6525
6526        igc_clear_interrupt_scheme(adapter);
6527        pci_iounmap(pdev, adapter->io_addr);
6528        pci_release_mem_regions(pdev);
6529
6530        free_netdev(netdev);
6531
6532        pci_disable_pcie_error_reporting(pdev);
6533
6534        pci_disable_device(pdev);
6535}
6536
6537static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake,
6538                          bool runtime)
6539{
6540        struct net_device *netdev = pci_get_drvdata(pdev);
6541        struct igc_adapter *adapter = netdev_priv(netdev);
6542        u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol;
6543        struct igc_hw *hw = &adapter->hw;
6544        u32 ctrl, rctl, status;
6545        bool wake;
6546
6547        rtnl_lock();
6548        netif_device_detach(netdev);
6549
6550        if (netif_running(netdev))
6551                __igc_close(netdev, true);
6552
6553        igc_ptp_suspend(adapter);
6554
6555        igc_clear_interrupt_scheme(adapter);
6556        rtnl_unlock();
6557
6558        status = rd32(IGC_STATUS);
6559        if (status & IGC_STATUS_LU)
6560                wufc &= ~IGC_WUFC_LNKC;
6561
6562        if (wufc) {
6563                igc_setup_rctl(adapter);
6564                igc_set_rx_mode(netdev);
6565
6566                /* turn on all-multi mode if wake on multicast is enabled */
6567                if (wufc & IGC_WUFC_MC) {
6568                        rctl = rd32(IGC_RCTL);
6569                        rctl |= IGC_RCTL_MPE;
6570                        wr32(IGC_RCTL, rctl);
6571                }
6572
6573                ctrl = rd32(IGC_CTRL);
6574                ctrl |= IGC_CTRL_ADVD3WUC;
6575                wr32(IGC_CTRL, ctrl);
6576
6577                /* Allow time for pending master requests to run */
6578                igc_disable_pcie_master(hw);
6579
6580                wr32(IGC_WUC, IGC_WUC_PME_EN);
6581                wr32(IGC_WUFC, wufc);
6582        } else {
6583                wr32(IGC_WUC, 0);
6584                wr32(IGC_WUFC, 0);
6585        }
6586
6587        wake = wufc || adapter->en_mng_pt;
6588        if (!wake)
6589                igc_power_down_phy_copper_base(&adapter->hw);
6590        else
6591                igc_power_up_link(adapter);
6592
6593        if (enable_wake)
6594                *enable_wake = wake;
6595
6596        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6597         * would have already happened in close and is redundant.
6598         */
6599        igc_release_hw_control(adapter);
6600
6601        pci_disable_device(pdev);
6602
6603        return 0;
6604}
6605
6606#ifdef CONFIG_PM
6607static int __maybe_unused igc_runtime_suspend(struct device *dev)
6608{
6609        return __igc_shutdown(to_pci_dev(dev), NULL, 1);
6610}
6611
6612static void igc_deliver_wake_packet(struct net_device *netdev)
6613{
6614        struct igc_adapter *adapter = netdev_priv(netdev);
6615        struct igc_hw *hw = &adapter->hw;
6616        struct sk_buff *skb;
6617        u32 wupl;
6618
6619        wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK;
6620
6621        /* WUPM stores only the first 128 bytes of the wake packet.
6622         * Read the packet only if we have the whole thing.
6623         */
6624        if (wupl == 0 || wupl > IGC_WUPM_BYTES)
6625                return;
6626
6627        skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES);
6628        if (!skb)
6629                return;
6630
6631        skb_put(skb, wupl);
6632
6633        /* Ensure reads are 32-bit aligned */
6634        wupl = roundup(wupl, 4);
6635
6636        memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl);
6637
6638        skb->protocol = eth_type_trans(skb, netdev);
6639        netif_rx(skb);
6640}
6641
6642static int __maybe_unused igc_resume(struct device *dev)
6643{
6644        struct pci_dev *pdev = to_pci_dev(dev);
6645        struct net_device *netdev = pci_get_drvdata(pdev);
6646        struct igc_adapter *adapter = netdev_priv(netdev);
6647        struct igc_hw *hw = &adapter->hw;
6648        u32 err, val;
6649
6650        pci_set_power_state(pdev, PCI_D0);
6651        pci_restore_state(pdev);
6652        pci_save_state(pdev);
6653
6654        if (!pci_device_is_present(pdev))
6655                return -ENODEV;
6656        err = pci_enable_device_mem(pdev);
6657        if (err) {
6658                netdev_err(netdev, "Cannot enable PCI device from suspend\n");
6659                return err;
6660        }
6661        pci_set_master(pdev);
6662
6663        pci_enable_wake(pdev, PCI_D3hot, 0);
6664        pci_enable_wake(pdev, PCI_D3cold, 0);
6665
6666        if (igc_init_interrupt_scheme(adapter, true)) {
6667                netdev_err(netdev, "Unable to allocate memory for queues\n");
6668                return -ENOMEM;
6669        }
6670
6671        igc_reset(adapter);
6672
6673        /* let the f/w know that the h/w is now under the control of the
6674         * driver.
6675         */
6676        igc_get_hw_control(adapter);
6677
6678        val = rd32(IGC_WUS);
6679        if (val & WAKE_PKT_WUS)
6680                igc_deliver_wake_packet(netdev);
6681
6682        wr32(IGC_WUS, ~0);
6683
6684        rtnl_lock();
6685        if (!err && netif_running(netdev))
6686                err = __igc_open(netdev, true);
6687
6688        if (!err)
6689                netif_device_attach(netdev);
6690        rtnl_unlock();
6691
6692        return err;
6693}
6694
6695static int __maybe_unused igc_runtime_resume(struct device *dev)
6696{
6697        return igc_resume(dev);
6698}
6699
6700static int __maybe_unused igc_suspend(struct device *dev)
6701{
6702        return __igc_shutdown(to_pci_dev(dev), NULL, 0);
6703}
6704
6705static int __maybe_unused igc_runtime_idle(struct device *dev)
6706{
6707        struct net_device *netdev = dev_get_drvdata(dev);
6708        struct igc_adapter *adapter = netdev_priv(netdev);
6709
6710        if (!igc_has_link(adapter))
6711                pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6712
6713        return -EBUSY;
6714}
6715#endif /* CONFIG_PM */
6716
6717static void igc_shutdown(struct pci_dev *pdev)
6718{
6719        bool wake;
6720
6721        __igc_shutdown(pdev, &wake, 0);
6722
6723        if (system_state == SYSTEM_POWER_OFF) {
6724                pci_wake_from_d3(pdev, wake);
6725                pci_set_power_state(pdev, PCI_D3hot);
6726        }
6727}
6728
6729/**
6730 *  igc_io_error_detected - called when PCI error is detected
6731 *  @pdev: Pointer to PCI device
6732 *  @state: The current PCI connection state
6733 *
6734 *  This function is called after a PCI bus error affecting
6735 *  this device has been detected.
6736 **/
6737static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
6738                                              pci_channel_state_t state)
6739{
6740        struct net_device *netdev = pci_get_drvdata(pdev);
6741        struct igc_adapter *adapter = netdev_priv(netdev);
6742
6743        netif_device_detach(netdev);
6744
6745        if (state == pci_channel_io_perm_failure)
6746                return PCI_ERS_RESULT_DISCONNECT;
6747
6748        if (netif_running(netdev))
6749                igc_down(adapter);
6750        pci_disable_device(pdev);
6751
6752        /* Request a slot reset. */
6753        return PCI_ERS_RESULT_NEED_RESET;
6754}
6755
6756/**
6757 *  igc_io_slot_reset - called after the PCI bus has been reset.
6758 *  @pdev: Pointer to PCI device
6759 *
6760 *  Restart the card from scratch, as if from a cold-boot. Implementation
6761 *  resembles the first-half of the igc_resume routine.
6762 **/
6763static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
6764{
6765        struct net_device *netdev = pci_get_drvdata(pdev);
6766        struct igc_adapter *adapter = netdev_priv(netdev);
6767        struct igc_hw *hw = &adapter->hw;
6768        pci_ers_result_t result;
6769
6770        if (pci_enable_device_mem(pdev)) {
6771                netdev_err(netdev, "Could not re-enable PCI device after reset\n");
6772                result = PCI_ERS_RESULT_DISCONNECT;
6773        } else {
6774                pci_set_master(pdev);
6775                pci_restore_state(pdev);
6776                pci_save_state(pdev);
6777
6778                pci_enable_wake(pdev, PCI_D3hot, 0);
6779                pci_enable_wake(pdev, PCI_D3cold, 0);
6780
6781                /* In case of PCI error, adapter loses its HW address
6782                 * so we should re-assign it here.
6783                 */
6784                hw->hw_addr = adapter->io_addr;
6785
6786                igc_reset(adapter);
6787                wr32(IGC_WUS, ~0);
6788                result = PCI_ERS_RESULT_RECOVERED;
6789        }
6790
6791        return result;
6792}
6793
6794/**
6795 *  igc_io_resume - called when traffic can start to flow again.
6796 *  @pdev: Pointer to PCI device
6797 *
6798 *  This callback is called when the error recovery driver tells us that
6799 *  its OK to resume normal operation. Implementation resembles the
6800 *  second-half of the igc_resume routine.
6801 */
6802static void igc_io_resume(struct pci_dev *pdev)
6803{
6804        struct net_device *netdev = pci_get_drvdata(pdev);
6805        struct igc_adapter *adapter = netdev_priv(netdev);
6806
6807        rtnl_lock();
6808        if (netif_running(netdev)) {
6809                if (igc_open(netdev)) {
6810                        netdev_err(netdev, "igc_open failed after reset\n");
6811                        return;
6812                }
6813        }
6814
6815        netif_device_attach(netdev);
6816
6817        /* let the f/w know that the h/w is now under the control of the
6818         * driver.
6819         */
6820        igc_get_hw_control(adapter);
6821        rtnl_unlock();
6822}
6823
6824static const struct pci_error_handlers igc_err_handler = {
6825        .error_detected = igc_io_error_detected,
6826        .slot_reset = igc_io_slot_reset,
6827        .resume = igc_io_resume,
6828};
6829
6830#ifdef CONFIG_PM
6831static const struct dev_pm_ops igc_pm_ops = {
6832        SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
6833        SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
6834                           igc_runtime_idle)
6835};
6836#endif
6837
6838static struct pci_driver igc_driver = {
6839        .name     = igc_driver_name,
6840        .id_table = igc_pci_tbl,
6841        .probe    = igc_probe,
6842        .remove   = igc_remove,
6843#ifdef CONFIG_PM
6844        .driver.pm = &igc_pm_ops,
6845#endif
6846        .shutdown = igc_shutdown,
6847        .err_handler = &igc_err_handler,
6848};
6849
6850/**
6851 * igc_reinit_queues - return error
6852 * @adapter: pointer to adapter structure
6853 */
6854int igc_reinit_queues(struct igc_adapter *adapter)
6855{
6856        struct net_device *netdev = adapter->netdev;
6857        int err = 0;
6858
6859        if (netif_running(netdev))
6860                igc_close(netdev);
6861
6862        igc_reset_interrupt_capability(adapter);
6863
6864        if (igc_init_interrupt_scheme(adapter, true)) {
6865                netdev_err(netdev, "Unable to allocate memory for queues\n");
6866                return -ENOMEM;
6867        }
6868
6869        if (netif_running(netdev))
6870                err = igc_open(netdev);
6871
6872        return err;
6873}
6874
6875/**
6876 * igc_get_hw_dev - return device
6877 * @hw: pointer to hardware structure
6878 *
6879 * used by hardware layer to print debugging information
6880 */
6881struct net_device *igc_get_hw_dev(struct igc_hw *hw)
6882{
6883        struct igc_adapter *adapter = hw->back;
6884
6885        return adapter->netdev;
6886}
6887
6888static void igc_disable_rx_ring_hw(struct igc_ring *ring)
6889{
6890        struct igc_hw *hw = &ring->q_vector->adapter->hw;
6891        u8 idx = ring->reg_idx;
6892        u32 rxdctl;
6893
6894        rxdctl = rd32(IGC_RXDCTL(idx));
6895        rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE;
6896        rxdctl |= IGC_RXDCTL_SWFLUSH;
6897        wr32(IGC_RXDCTL(idx), rxdctl);
6898}
6899
6900void igc_disable_rx_ring(struct igc_ring *ring)
6901{
6902        igc_disable_rx_ring_hw(ring);
6903        igc_clean_rx_ring(ring);
6904}
6905
6906void igc_enable_rx_ring(struct igc_ring *ring)
6907{
6908        struct igc_adapter *adapter = ring->q_vector->adapter;
6909
6910        igc_configure_rx_ring(adapter, ring);
6911
6912        if (ring->xsk_pool)
6913                igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
6914        else
6915                igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
6916}
6917
6918static void igc_disable_tx_ring_hw(struct igc_ring *ring)
6919{
6920        struct igc_hw *hw = &ring->q_vector->adapter->hw;
6921        u8 idx = ring->reg_idx;
6922        u32 txdctl;
6923
6924        txdctl = rd32(IGC_TXDCTL(idx));
6925        txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
6926        txdctl |= IGC_TXDCTL_SWFLUSH;
6927        wr32(IGC_TXDCTL(idx), txdctl);
6928}
6929
6930void igc_disable_tx_ring(struct igc_ring *ring)
6931{
6932        igc_disable_tx_ring_hw(ring);
6933        igc_clean_tx_ring(ring);
6934}
6935
6936void igc_enable_tx_ring(struct igc_ring *ring)
6937{
6938        struct igc_adapter *adapter = ring->q_vector->adapter;
6939
6940        igc_configure_tx_ring(adapter, ring);
6941}
6942
6943/**
6944 * igc_init_module - Driver Registration Routine
6945 *
6946 * igc_init_module is the first routine called when the driver is
6947 * loaded. All it does is register with the PCI subsystem.
6948 */
6949static int __init igc_init_module(void)
6950{
6951        int ret;
6952
6953        pr_info("%s\n", igc_driver_string);
6954        pr_info("%s\n", igc_copyright);
6955
6956        ret = pci_register_driver(&igc_driver);
6957        return ret;
6958}
6959
6960module_init(igc_init_module);
6961
6962/**
6963 * igc_exit_module - Driver Exit Cleanup Routine
6964 *
6965 * igc_exit_module is called just before the driver is removed
6966 * from memory.
6967 */
6968static void __exit igc_exit_module(void)
6969{
6970        pci_unregister_driver(&igc_driver);
6971}
6972
6973module_exit(igc_exit_module);
6974/* igc_main.c */
6975