linux/drivers/net/ethernet/intel/igc/igc_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c)  2018 Intel Corporation */
   3
   4#include <linux/module.h>
   5#include <linux/types.h>
   6#include <linux/if_vlan.h>
   7#include <linux/aer.h>
   8#include <linux/tcp.h>
   9#include <linux/udp.h>
  10#include <linux/ip.h>
  11#include <linux/pm_runtime.h>
  12#include <net/pkt_sched.h>
  13#include <linux/bpf_trace.h>
  14#include <net/xdp_sock_drv.h>
  15#include <linux/pci.h>
  16
  17#include <net/ipv6.h>
  18
  19#include "igc.h"
  20#include "igc_hw.h"
  21#include "igc_tsn.h"
  22#include "igc_xdp.h"
  23
  24#define DRV_SUMMARY     "Intel(R) 2.5G Ethernet Linux Driver"
  25
  26#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
  27
  28#define IGC_XDP_PASS            0
  29#define IGC_XDP_CONSUMED        BIT(0)
  30#define IGC_XDP_TX              BIT(1)
  31#define IGC_XDP_REDIRECT        BIT(2)
  32
  33static int debug = -1;
  34
  35MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
  36MODULE_DESCRIPTION(DRV_SUMMARY);
  37MODULE_LICENSE("GPL v2");
  38module_param(debug, int, 0);
  39MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
  40
  41char igc_driver_name[] = "igc";
  42static const char igc_driver_string[] = DRV_SUMMARY;
  43static const char igc_copyright[] =
  44        "Copyright(c) 2018 Intel Corporation.";
  45
  46static const struct igc_info *igc_info_tbl[] = {
  47        [board_base] = &igc_base_info,
  48};
  49
  50static const struct pci_device_id igc_pci_tbl[] = {
  51        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
  52        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
  53        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base },
  54        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base },
  55        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base },
  56        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base },
  57        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base },
  58        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base },
  59        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base },
  60        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base },
  61        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base },
  62        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base },
  63        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base },
  64        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base },
  65        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base },
  66        { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base },
  67        /* required last entry */
  68        {0, }
  69};
  70
  71MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
  72
  73enum latency_range {
  74        lowest_latency = 0,
  75        low_latency = 1,
  76        bulk_latency = 2,
  77        latency_invalid = 255
  78};
  79
  80void igc_reset(struct igc_adapter *adapter)
  81{
  82        struct net_device *dev = adapter->netdev;
  83        struct igc_hw *hw = &adapter->hw;
  84        struct igc_fc_info *fc = &hw->fc;
  85        u32 pba, hwm;
  86
  87        /* Repartition PBA for greater than 9k MTU if required */
  88        pba = IGC_PBA_34K;
  89
  90        /* flow control settings
  91         * The high water mark must be low enough to fit one full frame
  92         * after transmitting the pause frame.  As such we must have enough
  93         * space to allow for us to complete our current transmit and then
  94         * receive the frame that is in progress from the link partner.
  95         * Set it to:
  96         * - the full Rx FIFO size minus one full Tx plus one full Rx frame
  97         */
  98        hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
  99
 100        fc->high_water = hwm & 0xFFFFFFF0;      /* 16-byte granularity */
 101        fc->low_water = fc->high_water - 16;
 102        fc->pause_time = 0xFFFF;
 103        fc->send_xon = 1;
 104        fc->current_mode = fc->requested_mode;
 105
 106        hw->mac.ops.reset_hw(hw);
 107
 108        if (hw->mac.ops.init_hw(hw))
 109                netdev_err(dev, "Error on hardware initialization\n");
 110
 111        /* Re-establish EEE setting */
 112        igc_set_eee_i225(hw, true, true, true);
 113
 114        if (!netif_running(adapter->netdev))
 115                igc_power_down_phy_copper_base(&adapter->hw);
 116
 117        /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */
 118        wr32(IGC_VET, ETH_P_8021Q);
 119
 120        /* Re-enable PTP, where applicable. */
 121        igc_ptp_reset(adapter);
 122
 123        /* Re-enable TSN offloading, where applicable. */
 124        igc_tsn_reset(adapter);
 125
 126        igc_get_phy_info(hw);
 127}
 128
 129/**
 130 * igc_power_up_link - Power up the phy link
 131 * @adapter: address of board private structure
 132 */
 133static void igc_power_up_link(struct igc_adapter *adapter)
 134{
 135        igc_reset_phy(&adapter->hw);
 136
 137        igc_power_up_phy_copper(&adapter->hw);
 138
 139        igc_setup_link(&adapter->hw);
 140}
 141
 142/**
 143 * igc_release_hw_control - release control of the h/w to f/w
 144 * @adapter: address of board private structure
 145 *
 146 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
 147 * For ASF and Pass Through versions of f/w this means that the
 148 * driver is no longer loaded.
 149 */
 150static void igc_release_hw_control(struct igc_adapter *adapter)
 151{
 152        struct igc_hw *hw = &adapter->hw;
 153        u32 ctrl_ext;
 154
 155        if (!pci_device_is_present(adapter->pdev))
 156                return;
 157
 158        /* Let firmware take over control of h/w */
 159        ctrl_ext = rd32(IGC_CTRL_EXT);
 160        wr32(IGC_CTRL_EXT,
 161             ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
 162}
 163
 164/**
 165 * igc_get_hw_control - get control of the h/w from f/w
 166 * @adapter: address of board private structure
 167 *
 168 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
 169 * For ASF and Pass Through versions of f/w this means that
 170 * the driver is loaded.
 171 */
 172static void igc_get_hw_control(struct igc_adapter *adapter)
 173{
 174        struct igc_hw *hw = &adapter->hw;
 175        u32 ctrl_ext;
 176
 177        /* Let firmware know the driver has taken over */
 178        ctrl_ext = rd32(IGC_CTRL_EXT);
 179        wr32(IGC_CTRL_EXT,
 180             ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
 181}
 182
 183static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf)
 184{
 185        dma_unmap_single(dev, dma_unmap_addr(buf, dma),
 186                         dma_unmap_len(buf, len), DMA_TO_DEVICE);
 187
 188        dma_unmap_len_set(buf, len, 0);
 189}
 190
 191/**
 192 * igc_clean_tx_ring - Free Tx Buffers
 193 * @tx_ring: ring to be cleaned
 194 */
 195static void igc_clean_tx_ring(struct igc_ring *tx_ring)
 196{
 197        u16 i = tx_ring->next_to_clean;
 198        struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
 199        u32 xsk_frames = 0;
 200
 201        while (i != tx_ring->next_to_use) {
 202                union igc_adv_tx_desc *eop_desc, *tx_desc;
 203
 204                switch (tx_buffer->type) {
 205                case IGC_TX_BUFFER_TYPE_XSK:
 206                        xsk_frames++;
 207                        break;
 208                case IGC_TX_BUFFER_TYPE_XDP:
 209                        xdp_return_frame(tx_buffer->xdpf);
 210                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 211                        break;
 212                case IGC_TX_BUFFER_TYPE_SKB:
 213                        dev_kfree_skb_any(tx_buffer->skb);
 214                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 215                        break;
 216                default:
 217                        netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
 218                        break;
 219                }
 220
 221                /* check for eop_desc to determine the end of the packet */
 222                eop_desc = tx_buffer->next_to_watch;
 223                tx_desc = IGC_TX_DESC(tx_ring, i);
 224
 225                /* unmap remaining buffers */
 226                while (tx_desc != eop_desc) {
 227                        tx_buffer++;
 228                        tx_desc++;
 229                        i++;
 230                        if (unlikely(i == tx_ring->count)) {
 231                                i = 0;
 232                                tx_buffer = tx_ring->tx_buffer_info;
 233                                tx_desc = IGC_TX_DESC(tx_ring, 0);
 234                        }
 235
 236                        /* unmap any remaining paged data */
 237                        if (dma_unmap_len(tx_buffer, len))
 238                                igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
 239                }
 240
 241                tx_buffer->next_to_watch = NULL;
 242
 243                /* move us one more past the eop_desc for start of next pkt */
 244                tx_buffer++;
 245                i++;
 246                if (unlikely(i == tx_ring->count)) {
 247                        i = 0;
 248                        tx_buffer = tx_ring->tx_buffer_info;
 249                }
 250        }
 251
 252        if (tx_ring->xsk_pool && xsk_frames)
 253                xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
 254
 255        /* reset BQL for queue */
 256        netdev_tx_reset_queue(txring_txq(tx_ring));
 257
 258        /* reset next_to_use and next_to_clean */
 259        tx_ring->next_to_use = 0;
 260        tx_ring->next_to_clean = 0;
 261}
 262
 263/**
 264 * igc_free_tx_resources - Free Tx Resources per Queue
 265 * @tx_ring: Tx descriptor ring for a specific queue
 266 *
 267 * Free all transmit software resources
 268 */
 269void igc_free_tx_resources(struct igc_ring *tx_ring)
 270{
 271        igc_clean_tx_ring(tx_ring);
 272
 273        vfree(tx_ring->tx_buffer_info);
 274        tx_ring->tx_buffer_info = NULL;
 275
 276        /* if not set, then don't free */
 277        if (!tx_ring->desc)
 278                return;
 279
 280        dma_free_coherent(tx_ring->dev, tx_ring->size,
 281                          tx_ring->desc, tx_ring->dma);
 282
 283        tx_ring->desc = NULL;
 284}
 285
 286/**
 287 * igc_free_all_tx_resources - Free Tx Resources for All Queues
 288 * @adapter: board private structure
 289 *
 290 * Free all transmit software resources
 291 */
 292static void igc_free_all_tx_resources(struct igc_adapter *adapter)
 293{
 294        int i;
 295
 296        for (i = 0; i < adapter->num_tx_queues; i++)
 297                igc_free_tx_resources(adapter->tx_ring[i]);
 298}
 299
 300/**
 301 * igc_clean_all_tx_rings - Free Tx Buffers for all queues
 302 * @adapter: board private structure
 303 */
 304static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
 305{
 306        int i;
 307
 308        for (i = 0; i < adapter->num_tx_queues; i++)
 309                if (adapter->tx_ring[i])
 310                        igc_clean_tx_ring(adapter->tx_ring[i]);
 311}
 312
 313/**
 314 * igc_setup_tx_resources - allocate Tx resources (Descriptors)
 315 * @tx_ring: tx descriptor ring (for a specific queue) to setup
 316 *
 317 * Return 0 on success, negative on failure
 318 */
 319int igc_setup_tx_resources(struct igc_ring *tx_ring)
 320{
 321        struct net_device *ndev = tx_ring->netdev;
 322        struct device *dev = tx_ring->dev;
 323        int size = 0;
 324
 325        size = sizeof(struct igc_tx_buffer) * tx_ring->count;
 326        tx_ring->tx_buffer_info = vzalloc(size);
 327        if (!tx_ring->tx_buffer_info)
 328                goto err;
 329
 330        /* round up to nearest 4K */
 331        tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
 332        tx_ring->size = ALIGN(tx_ring->size, 4096);
 333
 334        tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
 335                                           &tx_ring->dma, GFP_KERNEL);
 336
 337        if (!tx_ring->desc)
 338                goto err;
 339
 340        tx_ring->next_to_use = 0;
 341        tx_ring->next_to_clean = 0;
 342
 343        return 0;
 344
 345err:
 346        vfree(tx_ring->tx_buffer_info);
 347        netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n");
 348        return -ENOMEM;
 349}
 350
 351/**
 352 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
 353 * @adapter: board private structure
 354 *
 355 * Return 0 on success, negative on failure
 356 */
 357static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
 358{
 359        struct net_device *dev = adapter->netdev;
 360        int i, err = 0;
 361
 362        for (i = 0; i < adapter->num_tx_queues; i++) {
 363                err = igc_setup_tx_resources(adapter->tx_ring[i]);
 364                if (err) {
 365                        netdev_err(dev, "Error on Tx queue %u setup\n", i);
 366                        for (i--; i >= 0; i--)
 367                                igc_free_tx_resources(adapter->tx_ring[i]);
 368                        break;
 369                }
 370        }
 371
 372        return err;
 373}
 374
 375static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring)
 376{
 377        u16 i = rx_ring->next_to_clean;
 378
 379        dev_kfree_skb(rx_ring->skb);
 380        rx_ring->skb = NULL;
 381
 382        /* Free all the Rx ring sk_buffs */
 383        while (i != rx_ring->next_to_alloc) {
 384                struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
 385
 386                /* Invalidate cache lines that may have been written to by
 387                 * device so that we avoid corrupting memory.
 388                 */
 389                dma_sync_single_range_for_cpu(rx_ring->dev,
 390                                              buffer_info->dma,
 391                                              buffer_info->page_offset,
 392                                              igc_rx_bufsz(rx_ring),
 393                                              DMA_FROM_DEVICE);
 394
 395                /* free resources associated with mapping */
 396                dma_unmap_page_attrs(rx_ring->dev,
 397                                     buffer_info->dma,
 398                                     igc_rx_pg_size(rx_ring),
 399                                     DMA_FROM_DEVICE,
 400                                     IGC_RX_DMA_ATTR);
 401                __page_frag_cache_drain(buffer_info->page,
 402                                        buffer_info->pagecnt_bias);
 403
 404                i++;
 405                if (i == rx_ring->count)
 406                        i = 0;
 407        }
 408}
 409
 410static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring)
 411{
 412        struct igc_rx_buffer *bi;
 413        u16 i;
 414
 415        for (i = 0; i < ring->count; i++) {
 416                bi = &ring->rx_buffer_info[i];
 417                if (!bi->xdp)
 418                        continue;
 419
 420                xsk_buff_free(bi->xdp);
 421                bi->xdp = NULL;
 422        }
 423}
 424
 425/**
 426 * igc_clean_rx_ring - Free Rx Buffers per Queue
 427 * @ring: ring to free buffers from
 428 */
 429static void igc_clean_rx_ring(struct igc_ring *ring)
 430{
 431        if (ring->xsk_pool)
 432                igc_clean_rx_ring_xsk_pool(ring);
 433        else
 434                igc_clean_rx_ring_page_shared(ring);
 435
 436        clear_ring_uses_large_buffer(ring);
 437
 438        ring->next_to_alloc = 0;
 439        ring->next_to_clean = 0;
 440        ring->next_to_use = 0;
 441}
 442
 443/**
 444 * igc_clean_all_rx_rings - Free Rx Buffers for all queues
 445 * @adapter: board private structure
 446 */
 447static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
 448{
 449        int i;
 450
 451        for (i = 0; i < adapter->num_rx_queues; i++)
 452                if (adapter->rx_ring[i])
 453                        igc_clean_rx_ring(adapter->rx_ring[i]);
 454}
 455
 456/**
 457 * igc_free_rx_resources - Free Rx Resources
 458 * @rx_ring: ring to clean the resources from
 459 *
 460 * Free all receive software resources
 461 */
 462void igc_free_rx_resources(struct igc_ring *rx_ring)
 463{
 464        igc_clean_rx_ring(rx_ring);
 465
 466        xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 467
 468        vfree(rx_ring->rx_buffer_info);
 469        rx_ring->rx_buffer_info = NULL;
 470
 471        /* if not set, then don't free */
 472        if (!rx_ring->desc)
 473                return;
 474
 475        dma_free_coherent(rx_ring->dev, rx_ring->size,
 476                          rx_ring->desc, rx_ring->dma);
 477
 478        rx_ring->desc = NULL;
 479}
 480
 481/**
 482 * igc_free_all_rx_resources - Free Rx Resources for All Queues
 483 * @adapter: board private structure
 484 *
 485 * Free all receive software resources
 486 */
 487static void igc_free_all_rx_resources(struct igc_adapter *adapter)
 488{
 489        int i;
 490
 491        for (i = 0; i < adapter->num_rx_queues; i++)
 492                igc_free_rx_resources(adapter->rx_ring[i]);
 493}
 494
 495/**
 496 * igc_setup_rx_resources - allocate Rx resources (Descriptors)
 497 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
 498 *
 499 * Returns 0 on success, negative on failure
 500 */
 501int igc_setup_rx_resources(struct igc_ring *rx_ring)
 502{
 503        struct net_device *ndev = rx_ring->netdev;
 504        struct device *dev = rx_ring->dev;
 505        u8 index = rx_ring->queue_index;
 506        int size, desc_len, res;
 507
 508        /* XDP RX-queue info */
 509        if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
 510                xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 511        res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
 512                               rx_ring->q_vector->napi.napi_id);
 513        if (res < 0) {
 514                netdev_err(ndev, "Failed to register xdp_rxq index %u\n",
 515                           index);
 516                return res;
 517        }
 518
 519        size = sizeof(struct igc_rx_buffer) * rx_ring->count;
 520        rx_ring->rx_buffer_info = vzalloc(size);
 521        if (!rx_ring->rx_buffer_info)
 522                goto err;
 523
 524        desc_len = sizeof(union igc_adv_rx_desc);
 525
 526        /* Round up to nearest 4K */
 527        rx_ring->size = rx_ring->count * desc_len;
 528        rx_ring->size = ALIGN(rx_ring->size, 4096);
 529
 530        rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
 531                                           &rx_ring->dma, GFP_KERNEL);
 532
 533        if (!rx_ring->desc)
 534                goto err;
 535
 536        rx_ring->next_to_alloc = 0;
 537        rx_ring->next_to_clean = 0;
 538        rx_ring->next_to_use = 0;
 539
 540        return 0;
 541
 542err:
 543        xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 544        vfree(rx_ring->rx_buffer_info);
 545        rx_ring->rx_buffer_info = NULL;
 546        netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
 547        return -ENOMEM;
 548}
 549
 550/**
 551 * igc_setup_all_rx_resources - wrapper to allocate Rx resources
 552 *                                (Descriptors) for all queues
 553 * @adapter: board private structure
 554 *
 555 * Return 0 on success, negative on failure
 556 */
 557static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
 558{
 559        struct net_device *dev = adapter->netdev;
 560        int i, err = 0;
 561
 562        for (i = 0; i < adapter->num_rx_queues; i++) {
 563                err = igc_setup_rx_resources(adapter->rx_ring[i]);
 564                if (err) {
 565                        netdev_err(dev, "Error on Rx queue %u setup\n", i);
 566                        for (i--; i >= 0; i--)
 567                                igc_free_rx_resources(adapter->rx_ring[i]);
 568                        break;
 569                }
 570        }
 571
 572        return err;
 573}
 574
 575static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter,
 576                                              struct igc_ring *ring)
 577{
 578        if (!igc_xdp_is_enabled(adapter) ||
 579            !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags))
 580                return NULL;
 581
 582        return xsk_get_pool_from_qid(ring->netdev, ring->queue_index);
 583}
 584
 585/**
 586 * igc_configure_rx_ring - Configure a receive ring after Reset
 587 * @adapter: board private structure
 588 * @ring: receive ring to be configured
 589 *
 590 * Configure the Rx unit of the MAC after a reset.
 591 */
 592static void igc_configure_rx_ring(struct igc_adapter *adapter,
 593                                  struct igc_ring *ring)
 594{
 595        struct igc_hw *hw = &adapter->hw;
 596        union igc_adv_rx_desc *rx_desc;
 597        int reg_idx = ring->reg_idx;
 598        u32 srrctl = 0, rxdctl = 0;
 599        u64 rdba = ring->dma;
 600        u32 buf_size;
 601
 602        xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
 603        ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
 604        if (ring->xsk_pool) {
 605                WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 606                                                   MEM_TYPE_XSK_BUFF_POOL,
 607                                                   NULL));
 608                xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
 609        } else {
 610                WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 611                                                   MEM_TYPE_PAGE_SHARED,
 612                                                   NULL));
 613        }
 614
 615        if (igc_xdp_is_enabled(adapter))
 616                set_ring_uses_large_buffer(ring);
 617
 618        /* disable the queue */
 619        wr32(IGC_RXDCTL(reg_idx), 0);
 620
 621        /* Set DMA base address registers */
 622        wr32(IGC_RDBAL(reg_idx),
 623             rdba & 0x00000000ffffffffULL);
 624        wr32(IGC_RDBAH(reg_idx), rdba >> 32);
 625        wr32(IGC_RDLEN(reg_idx),
 626             ring->count * sizeof(union igc_adv_rx_desc));
 627
 628        /* initialize head and tail */
 629        ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
 630        wr32(IGC_RDH(reg_idx), 0);
 631        writel(0, ring->tail);
 632
 633        /* reset next-to- use/clean to place SW in sync with hardware */
 634        ring->next_to_clean = 0;
 635        ring->next_to_use = 0;
 636
 637        if (ring->xsk_pool)
 638                buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
 639        else if (ring_uses_large_buffer(ring))
 640                buf_size = IGC_RXBUFFER_3072;
 641        else
 642                buf_size = IGC_RXBUFFER_2048;
 643
 644        srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
 645        srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT;
 646        srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
 647
 648        wr32(IGC_SRRCTL(reg_idx), srrctl);
 649
 650        rxdctl |= IGC_RX_PTHRESH;
 651        rxdctl |= IGC_RX_HTHRESH << 8;
 652        rxdctl |= IGC_RX_WTHRESH << 16;
 653
 654        /* initialize rx_buffer_info */
 655        memset(ring->rx_buffer_info, 0,
 656               sizeof(struct igc_rx_buffer) * ring->count);
 657
 658        /* initialize Rx descriptor 0 */
 659        rx_desc = IGC_RX_DESC(ring, 0);
 660        rx_desc->wb.upper.length = 0;
 661
 662        /* enable receive descriptor fetching */
 663        rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
 664
 665        wr32(IGC_RXDCTL(reg_idx), rxdctl);
 666}
 667
 668/**
 669 * igc_configure_rx - Configure receive Unit after Reset
 670 * @adapter: board private structure
 671 *
 672 * Configure the Rx unit of the MAC after a reset.
 673 */
 674static void igc_configure_rx(struct igc_adapter *adapter)
 675{
 676        int i;
 677
 678        /* Setup the HW Rx Head and Tail Descriptor Pointers and
 679         * the Base and Length of the Rx Descriptor Ring
 680         */
 681        for (i = 0; i < adapter->num_rx_queues; i++)
 682                igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
 683}
 684
 685/**
 686 * igc_configure_tx_ring - Configure transmit ring after Reset
 687 * @adapter: board private structure
 688 * @ring: tx ring to configure
 689 *
 690 * Configure a transmit ring after a reset.
 691 */
 692static void igc_configure_tx_ring(struct igc_adapter *adapter,
 693                                  struct igc_ring *ring)
 694{
 695        struct igc_hw *hw = &adapter->hw;
 696        int reg_idx = ring->reg_idx;
 697        u64 tdba = ring->dma;
 698        u32 txdctl = 0;
 699
 700        ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
 701
 702        /* disable the queue */
 703        wr32(IGC_TXDCTL(reg_idx), 0);
 704        wrfl();
 705        mdelay(10);
 706
 707        wr32(IGC_TDLEN(reg_idx),
 708             ring->count * sizeof(union igc_adv_tx_desc));
 709        wr32(IGC_TDBAL(reg_idx),
 710             tdba & 0x00000000ffffffffULL);
 711        wr32(IGC_TDBAH(reg_idx), tdba >> 32);
 712
 713        ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
 714        wr32(IGC_TDH(reg_idx), 0);
 715        writel(0, ring->tail);
 716
 717        txdctl |= IGC_TX_PTHRESH;
 718        txdctl |= IGC_TX_HTHRESH << 8;
 719        txdctl |= IGC_TX_WTHRESH << 16;
 720
 721        txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
 722        wr32(IGC_TXDCTL(reg_idx), txdctl);
 723}
 724
 725/**
 726 * igc_configure_tx - Configure transmit Unit after Reset
 727 * @adapter: board private structure
 728 *
 729 * Configure the Tx unit of the MAC after a reset.
 730 */
 731static void igc_configure_tx(struct igc_adapter *adapter)
 732{
 733        int i;
 734
 735        for (i = 0; i < adapter->num_tx_queues; i++)
 736                igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
 737}
 738
 739/**
 740 * igc_setup_mrqc - configure the multiple receive queue control registers
 741 * @adapter: Board private structure
 742 */
 743static void igc_setup_mrqc(struct igc_adapter *adapter)
 744{
 745        struct igc_hw *hw = &adapter->hw;
 746        u32 j, num_rx_queues;
 747        u32 mrqc, rxcsum;
 748        u32 rss_key[10];
 749
 750        netdev_rss_key_fill(rss_key, sizeof(rss_key));
 751        for (j = 0; j < 10; j++)
 752                wr32(IGC_RSSRK(j), rss_key[j]);
 753
 754        num_rx_queues = adapter->rss_queues;
 755
 756        if (adapter->rss_indir_tbl_init != num_rx_queues) {
 757                for (j = 0; j < IGC_RETA_SIZE; j++)
 758                        adapter->rss_indir_tbl[j] =
 759                        (j * num_rx_queues) / IGC_RETA_SIZE;
 760                adapter->rss_indir_tbl_init = num_rx_queues;
 761        }
 762        igc_write_rss_indir_tbl(adapter);
 763
 764        /* Disable raw packet checksumming so that RSS hash is placed in
 765         * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
 766         * offloads as they are enabled by default
 767         */
 768        rxcsum = rd32(IGC_RXCSUM);
 769        rxcsum |= IGC_RXCSUM_PCSD;
 770
 771        /* Enable Receive Checksum Offload for SCTP */
 772        rxcsum |= IGC_RXCSUM_CRCOFL;
 773
 774        /* Don't need to set TUOFL or IPOFL, they default to 1 */
 775        wr32(IGC_RXCSUM, rxcsum);
 776
 777        /* Generate RSS hash based on packet types, TCP/UDP
 778         * port numbers and/or IPv4/v6 src and dst addresses
 779         */
 780        mrqc = IGC_MRQC_RSS_FIELD_IPV4 |
 781               IGC_MRQC_RSS_FIELD_IPV4_TCP |
 782               IGC_MRQC_RSS_FIELD_IPV6 |
 783               IGC_MRQC_RSS_FIELD_IPV6_TCP |
 784               IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
 785
 786        if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
 787                mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
 788        if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
 789                mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
 790
 791        mrqc |= IGC_MRQC_ENABLE_RSS_MQ;
 792
 793        wr32(IGC_MRQC, mrqc);
 794}
 795
 796/**
 797 * igc_setup_rctl - configure the receive control registers
 798 * @adapter: Board private structure
 799 */
 800static void igc_setup_rctl(struct igc_adapter *adapter)
 801{
 802        struct igc_hw *hw = &adapter->hw;
 803        u32 rctl;
 804
 805        rctl = rd32(IGC_RCTL);
 806
 807        rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
 808        rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
 809
 810        rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
 811                (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
 812
 813        /* enable stripping of CRC. Newer features require
 814         * that the HW strips the CRC.
 815         */
 816        rctl |= IGC_RCTL_SECRC;
 817
 818        /* disable store bad packets and clear size bits. */
 819        rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
 820
 821        /* enable LPE to allow for reception of jumbo frames */
 822        rctl |= IGC_RCTL_LPE;
 823
 824        /* disable queue 0 to prevent tail write w/o re-config */
 825        wr32(IGC_RXDCTL(0), 0);
 826
 827        /* This is useful for sniffing bad packets. */
 828        if (adapter->netdev->features & NETIF_F_RXALL) {
 829                /* UPE and MPE will be handled by normal PROMISC logic
 830                 * in set_rx_mode
 831                 */
 832                rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
 833                         IGC_RCTL_BAM | /* RX All Bcast Pkts */
 834                         IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
 835
 836                rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
 837                          IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
 838        }
 839
 840        wr32(IGC_RCTL, rctl);
 841}
 842
 843/**
 844 * igc_setup_tctl - configure the transmit control registers
 845 * @adapter: Board private structure
 846 */
 847static void igc_setup_tctl(struct igc_adapter *adapter)
 848{
 849        struct igc_hw *hw = &adapter->hw;
 850        u32 tctl;
 851
 852        /* disable queue 0 which icould be enabled by default */
 853        wr32(IGC_TXDCTL(0), 0);
 854
 855        /* Program the Transmit Control Register */
 856        tctl = rd32(IGC_TCTL);
 857        tctl &= ~IGC_TCTL_CT;
 858        tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
 859                (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
 860
 861        /* Enable transmits */
 862        tctl |= IGC_TCTL_EN;
 863
 864        wr32(IGC_TCTL, tctl);
 865}
 866
 867/**
 868 * igc_set_mac_filter_hw() - Set MAC address filter in hardware
 869 * @adapter: Pointer to adapter where the filter should be set
 870 * @index: Filter index
 871 * @type: MAC address filter type (source or destination)
 872 * @addr: MAC address
 873 * @queue: If non-negative, queue assignment feature is enabled and frames
 874 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
 875 *         assignment is disabled.
 876 */
 877static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
 878                                  enum igc_mac_filter_type type,
 879                                  const u8 *addr, int queue)
 880{
 881        struct net_device *dev = adapter->netdev;
 882        struct igc_hw *hw = &adapter->hw;
 883        u32 ral, rah;
 884
 885        if (WARN_ON(index >= hw->mac.rar_entry_count))
 886                return;
 887
 888        ral = le32_to_cpup((__le32 *)(addr));
 889        rah = le16_to_cpup((__le16 *)(addr + 4));
 890
 891        if (type == IGC_MAC_FILTER_TYPE_SRC) {
 892                rah &= ~IGC_RAH_ASEL_MASK;
 893                rah |= IGC_RAH_ASEL_SRC_ADDR;
 894        }
 895
 896        if (queue >= 0) {
 897                rah &= ~IGC_RAH_QSEL_MASK;
 898                rah |= (queue << IGC_RAH_QSEL_SHIFT);
 899                rah |= IGC_RAH_QSEL_ENABLE;
 900        }
 901
 902        rah |= IGC_RAH_AV;
 903
 904        wr32(IGC_RAL(index), ral);
 905        wr32(IGC_RAH(index), rah);
 906
 907        netdev_dbg(dev, "MAC address filter set in HW: index %d", index);
 908}
 909
 910/**
 911 * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware
 912 * @adapter: Pointer to adapter where the filter should be cleared
 913 * @index: Filter index
 914 */
 915static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
 916{
 917        struct net_device *dev = adapter->netdev;
 918        struct igc_hw *hw = &adapter->hw;
 919
 920        if (WARN_ON(index >= hw->mac.rar_entry_count))
 921                return;
 922
 923        wr32(IGC_RAL(index), 0);
 924        wr32(IGC_RAH(index), 0);
 925
 926        netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index);
 927}
 928
 929/* Set default MAC address for the PF in the first RAR entry */
 930static void igc_set_default_mac_filter(struct igc_adapter *adapter)
 931{
 932        struct net_device *dev = adapter->netdev;
 933        u8 *addr = adapter->hw.mac.addr;
 934
 935        netdev_dbg(dev, "Set default MAC address filter: address %pM", addr);
 936
 937        igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1);
 938}
 939
 940/**
 941 * igc_set_mac - Change the Ethernet Address of the NIC
 942 * @netdev: network interface device structure
 943 * @p: pointer to an address structure
 944 *
 945 * Returns 0 on success, negative on failure
 946 */
 947static int igc_set_mac(struct net_device *netdev, void *p)
 948{
 949        struct igc_adapter *adapter = netdev_priv(netdev);
 950        struct igc_hw *hw = &adapter->hw;
 951        struct sockaddr *addr = p;
 952
 953        if (!is_valid_ether_addr(addr->sa_data))
 954                return -EADDRNOTAVAIL;
 955
 956        eth_hw_addr_set(netdev, addr->sa_data);
 957        memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
 958
 959        /* set the correct pool for the new PF MAC address in entry 0 */
 960        igc_set_default_mac_filter(adapter);
 961
 962        return 0;
 963}
 964
 965/**
 966 *  igc_write_mc_addr_list - write multicast addresses to MTA
 967 *  @netdev: network interface device structure
 968 *
 969 *  Writes multicast address list to the MTA hash table.
 970 *  Returns: -ENOMEM on failure
 971 *           0 on no addresses written
 972 *           X on writing X addresses to MTA
 973 **/
 974static int igc_write_mc_addr_list(struct net_device *netdev)
 975{
 976        struct igc_adapter *adapter = netdev_priv(netdev);
 977        struct igc_hw *hw = &adapter->hw;
 978        struct netdev_hw_addr *ha;
 979        u8  *mta_list;
 980        int i;
 981
 982        if (netdev_mc_empty(netdev)) {
 983                /* nothing to program, so clear mc list */
 984                igc_update_mc_addr_list(hw, NULL, 0);
 985                return 0;
 986        }
 987
 988        mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC);
 989        if (!mta_list)
 990                return -ENOMEM;
 991
 992        /* The shared function expects a packed array of only addresses. */
 993        i = 0;
 994        netdev_for_each_mc_addr(ha, netdev)
 995                memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
 996
 997        igc_update_mc_addr_list(hw, mta_list, i);
 998        kfree(mta_list);
 999
1000        return netdev_mc_count(netdev);
1001}
1002
1003static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime)
1004{
1005        ktime_t cycle_time = adapter->cycle_time;
1006        ktime_t base_time = adapter->base_time;
1007        u32 launchtime;
1008
1009        /* FIXME: when using ETF together with taprio, we may have a
1010         * case where 'delta' is larger than the cycle_time, this may
1011         * cause problems if we don't read the current value of
1012         * IGC_BASET, as the value writen into the launchtime
1013         * descriptor field may be misinterpreted.
1014         */
1015        div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime);
1016
1017        return cpu_to_le32(launchtime);
1018}
1019
1020static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
1021                            struct igc_tx_buffer *first,
1022                            u32 vlan_macip_lens, u32 type_tucmd,
1023                            u32 mss_l4len_idx)
1024{
1025        struct igc_adv_tx_context_desc *context_desc;
1026        u16 i = tx_ring->next_to_use;
1027
1028        context_desc = IGC_TX_CTXTDESC(tx_ring, i);
1029
1030        i++;
1031        tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1032
1033        /* set bits to identify this as an advanced context descriptor */
1034        type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
1035
1036        /* For i225, context index must be unique per ring. */
1037        if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
1038                mss_l4len_idx |= tx_ring->reg_idx << 4;
1039
1040        context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
1041        context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
1042        context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
1043
1044        /* We assume there is always a valid Tx time available. Invalid times
1045         * should have been handled by the upper layers.
1046         */
1047        if (tx_ring->launchtime_enable) {
1048                struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
1049                ktime_t txtime = first->skb->tstamp;
1050
1051                skb_txtime_consumed(first->skb);
1052                context_desc->launch_time = igc_tx_launchtime(adapter,
1053                                                              txtime);
1054        } else {
1055                context_desc->launch_time = 0;
1056        }
1057}
1058
1059static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
1060{
1061        struct sk_buff *skb = first->skb;
1062        u32 vlan_macip_lens = 0;
1063        u32 type_tucmd = 0;
1064
1065        if (skb->ip_summed != CHECKSUM_PARTIAL) {
1066csum_failed:
1067                if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) &&
1068                    !tx_ring->launchtime_enable)
1069                        return;
1070                goto no_csum;
1071        }
1072
1073        switch (skb->csum_offset) {
1074        case offsetof(struct tcphdr, check):
1075                type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1076                fallthrough;
1077        case offsetof(struct udphdr, check):
1078                break;
1079        case offsetof(struct sctphdr, checksum):
1080                /* validate that this is actually an SCTP request */
1081                if (skb_csum_is_sctp(skb)) {
1082                        type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP;
1083                        break;
1084                }
1085                fallthrough;
1086        default:
1087                skb_checksum_help(skb);
1088                goto csum_failed;
1089        }
1090
1091        /* update TX checksum flag */
1092        first->tx_flags |= IGC_TX_FLAGS_CSUM;
1093        vlan_macip_lens = skb_checksum_start_offset(skb) -
1094                          skb_network_offset(skb);
1095no_csum:
1096        vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
1097        vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1098
1099        igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
1100}
1101
1102static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1103{
1104        struct net_device *netdev = tx_ring->netdev;
1105
1106        netif_stop_subqueue(netdev, tx_ring->queue_index);
1107
1108        /* memory barriier comment */
1109        smp_mb();
1110
1111        /* We need to check again in a case another CPU has just
1112         * made room available.
1113         */
1114        if (igc_desc_unused(tx_ring) < size)
1115                return -EBUSY;
1116
1117        /* A reprieve! */
1118        netif_wake_subqueue(netdev, tx_ring->queue_index);
1119
1120        u64_stats_update_begin(&tx_ring->tx_syncp2);
1121        tx_ring->tx_stats.restart_queue2++;
1122        u64_stats_update_end(&tx_ring->tx_syncp2);
1123
1124        return 0;
1125}
1126
1127static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1128{
1129        if (igc_desc_unused(tx_ring) >= size)
1130                return 0;
1131        return __igc_maybe_stop_tx(tx_ring, size);
1132}
1133
1134#define IGC_SET_FLAG(_input, _flag, _result) \
1135        (((_flag) <= (_result)) ?                               \
1136         ((u32)((_input) & (_flag)) * ((_result) / (_flag))) :  \
1137         ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
1138
1139static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
1140{
1141        /* set type for advanced descriptor with frame checksum insertion */
1142        u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
1143                       IGC_ADVTXD_DCMD_DEXT |
1144                       IGC_ADVTXD_DCMD_IFCS;
1145
1146        /* set HW vlan bit if vlan is present */
1147        cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN,
1148                                 IGC_ADVTXD_DCMD_VLE);
1149
1150        /* set segmentation bits for TSO */
1151        cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO,
1152                                 (IGC_ADVTXD_DCMD_TSE));
1153
1154        /* set timestamp bit if present */
1155        cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
1156                                 (IGC_ADVTXD_MAC_TSTAMP));
1157
1158        /* insert frame checksum */
1159        cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS);
1160
1161        return cmd_type;
1162}
1163
1164static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
1165                                 union igc_adv_tx_desc *tx_desc,
1166                                 u32 tx_flags, unsigned int paylen)
1167{
1168        u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
1169
1170        /* insert L4 checksum */
1171        olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
1172                          ((IGC_TXD_POPTS_TXSM << 8) /
1173                          IGC_TX_FLAGS_CSUM);
1174
1175        /* insert IPv4 checksum */
1176        olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
1177                          (((IGC_TXD_POPTS_IXSM << 8)) /
1178                          IGC_TX_FLAGS_IPV4);
1179
1180        tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
1181}
1182
1183static int igc_tx_map(struct igc_ring *tx_ring,
1184                      struct igc_tx_buffer *first,
1185                      const u8 hdr_len)
1186{
1187        struct sk_buff *skb = first->skb;
1188        struct igc_tx_buffer *tx_buffer;
1189        union igc_adv_tx_desc *tx_desc;
1190        u32 tx_flags = first->tx_flags;
1191        skb_frag_t *frag;
1192        u16 i = tx_ring->next_to_use;
1193        unsigned int data_len, size;
1194        dma_addr_t dma;
1195        u32 cmd_type;
1196
1197        cmd_type = igc_tx_cmd_type(skb, tx_flags);
1198        tx_desc = IGC_TX_DESC(tx_ring, i);
1199
1200        igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
1201
1202        size = skb_headlen(skb);
1203        data_len = skb->data_len;
1204
1205        dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
1206
1207        tx_buffer = first;
1208
1209        for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
1210                if (dma_mapping_error(tx_ring->dev, dma))
1211                        goto dma_error;
1212
1213                /* record length, and DMA address */
1214                dma_unmap_len_set(tx_buffer, len, size);
1215                dma_unmap_addr_set(tx_buffer, dma, dma);
1216
1217                tx_desc->read.buffer_addr = cpu_to_le64(dma);
1218
1219                while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
1220                        tx_desc->read.cmd_type_len =
1221                                cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
1222
1223                        i++;
1224                        tx_desc++;
1225                        if (i == tx_ring->count) {
1226                                tx_desc = IGC_TX_DESC(tx_ring, 0);
1227                                i = 0;
1228                        }
1229                        tx_desc->read.olinfo_status = 0;
1230
1231                        dma += IGC_MAX_DATA_PER_TXD;
1232                        size -= IGC_MAX_DATA_PER_TXD;
1233
1234                        tx_desc->read.buffer_addr = cpu_to_le64(dma);
1235                }
1236
1237                if (likely(!data_len))
1238                        break;
1239
1240                tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
1241
1242                i++;
1243                tx_desc++;
1244                if (i == tx_ring->count) {
1245                        tx_desc = IGC_TX_DESC(tx_ring, 0);
1246                        i = 0;
1247                }
1248                tx_desc->read.olinfo_status = 0;
1249
1250                size = skb_frag_size(frag);
1251                data_len -= size;
1252
1253                dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
1254                                       size, DMA_TO_DEVICE);
1255
1256                tx_buffer = &tx_ring->tx_buffer_info[i];
1257        }
1258
1259        /* write last descriptor with RS and EOP bits */
1260        cmd_type |= size | IGC_TXD_DCMD;
1261        tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
1262
1263        netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
1264
1265        /* set the timestamp */
1266        first->time_stamp = jiffies;
1267
1268        skb_tx_timestamp(skb);
1269
1270        /* Force memory writes to complete before letting h/w know there
1271         * are new descriptors to fetch.  (Only applicable for weak-ordered
1272         * memory model archs, such as IA-64).
1273         *
1274         * We also need this memory barrier to make certain all of the
1275         * status bits have been updated before next_to_watch is written.
1276         */
1277        wmb();
1278
1279        /* set next_to_watch value indicating a packet is present */
1280        first->next_to_watch = tx_desc;
1281
1282        i++;
1283        if (i == tx_ring->count)
1284                i = 0;
1285
1286        tx_ring->next_to_use = i;
1287
1288        /* Make sure there is space in the ring for the next send. */
1289        igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
1290
1291        if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
1292                writel(i, tx_ring->tail);
1293        }
1294
1295        return 0;
1296dma_error:
1297        netdev_err(tx_ring->netdev, "TX DMA map failed\n");
1298        tx_buffer = &tx_ring->tx_buffer_info[i];
1299
1300        /* clear dma mappings for failed tx_buffer_info map */
1301        while (tx_buffer != first) {
1302                if (dma_unmap_len(tx_buffer, len))
1303                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1304
1305                if (i-- == 0)
1306                        i += tx_ring->count;
1307                tx_buffer = &tx_ring->tx_buffer_info[i];
1308        }
1309
1310        if (dma_unmap_len(tx_buffer, len))
1311                igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1312
1313        dev_kfree_skb_any(tx_buffer->skb);
1314        tx_buffer->skb = NULL;
1315
1316        tx_ring->next_to_use = i;
1317
1318        return -1;
1319}
1320
1321static int igc_tso(struct igc_ring *tx_ring,
1322                   struct igc_tx_buffer *first,
1323                   u8 *hdr_len)
1324{
1325        u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
1326        struct sk_buff *skb = first->skb;
1327        union {
1328                struct iphdr *v4;
1329                struct ipv6hdr *v6;
1330                unsigned char *hdr;
1331        } ip;
1332        union {
1333                struct tcphdr *tcp;
1334                struct udphdr *udp;
1335                unsigned char *hdr;
1336        } l4;
1337        u32 paylen, l4_offset;
1338        int err;
1339
1340        if (skb->ip_summed != CHECKSUM_PARTIAL)
1341                return 0;
1342
1343        if (!skb_is_gso(skb))
1344                return 0;
1345
1346        err = skb_cow_head(skb, 0);
1347        if (err < 0)
1348                return err;
1349
1350        ip.hdr = skb_network_header(skb);
1351        l4.hdr = skb_checksum_start(skb);
1352
1353        /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
1354        type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1355
1356        /* initialize outer IP header fields */
1357        if (ip.v4->version == 4) {
1358                unsigned char *csum_start = skb_checksum_start(skb);
1359                unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
1360
1361                /* IP header will have to cancel out any data that
1362                 * is not a part of the outer IP header
1363                 */
1364                ip.v4->check = csum_fold(csum_partial(trans_start,
1365                                                      csum_start - trans_start,
1366                                                      0));
1367                type_tucmd |= IGC_ADVTXD_TUCMD_IPV4;
1368
1369                ip.v4->tot_len = 0;
1370                first->tx_flags |= IGC_TX_FLAGS_TSO |
1371                                   IGC_TX_FLAGS_CSUM |
1372                                   IGC_TX_FLAGS_IPV4;
1373        } else {
1374                ip.v6->payload_len = 0;
1375                first->tx_flags |= IGC_TX_FLAGS_TSO |
1376                                   IGC_TX_FLAGS_CSUM;
1377        }
1378
1379        /* determine offset of inner transport header */
1380        l4_offset = l4.hdr - skb->data;
1381
1382        /* remove payload length from inner checksum */
1383        paylen = skb->len - l4_offset;
1384        if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) {
1385                /* compute length of segmentation header */
1386                *hdr_len = (l4.tcp->doff * 4) + l4_offset;
1387                csum_replace_by_diff(&l4.tcp->check,
1388                                     (__force __wsum)htonl(paylen));
1389        } else {
1390                /* compute length of segmentation header */
1391                *hdr_len = sizeof(*l4.udp) + l4_offset;
1392                csum_replace_by_diff(&l4.udp->check,
1393                                     (__force __wsum)htonl(paylen));
1394        }
1395
1396        /* update gso size and bytecount with header size */
1397        first->gso_segs = skb_shinfo(skb)->gso_segs;
1398        first->bytecount += (first->gso_segs - 1) * *hdr_len;
1399
1400        /* MSS L4LEN IDX */
1401        mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT;
1402        mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT;
1403
1404        /* VLAN MACLEN IPLEN */
1405        vlan_macip_lens = l4.hdr - ip.hdr;
1406        vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
1407        vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1408
1409        igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
1410                        type_tucmd, mss_l4len_idx);
1411
1412        return 1;
1413}
1414
1415static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
1416                                       struct igc_ring *tx_ring)
1417{
1418        u16 count = TXD_USE_COUNT(skb_headlen(skb));
1419        __be16 protocol = vlan_get_protocol(skb);
1420        struct igc_tx_buffer *first;
1421        u32 tx_flags = 0;
1422        unsigned short f;
1423        u8 hdr_len = 0;
1424        int tso = 0;
1425
1426        /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
1427         *      + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
1428         *      + 2 desc gap to keep tail from touching head,
1429         *      + 1 desc for context descriptor,
1430         * otherwise try next time
1431         */
1432        for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
1433                count += TXD_USE_COUNT(skb_frag_size(
1434                                                &skb_shinfo(skb)->frags[f]));
1435
1436        if (igc_maybe_stop_tx(tx_ring, count + 3)) {
1437                /* this is a hard error */
1438                return NETDEV_TX_BUSY;
1439        }
1440
1441        /* record the location of the first descriptor for this packet */
1442        first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
1443        first->type = IGC_TX_BUFFER_TYPE_SKB;
1444        first->skb = skb;
1445        first->bytecount = skb->len;
1446        first->gso_segs = 1;
1447
1448        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
1449                struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
1450
1451                /* FIXME: add support for retrieving timestamps from
1452                 * the other timer registers before skipping the
1453                 * timestamping request.
1454                 */
1455                if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
1456                    !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
1457                                           &adapter->state)) {
1458                        skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1459                        tx_flags |= IGC_TX_FLAGS_TSTAMP;
1460
1461                        adapter->ptp_tx_skb = skb_get(skb);
1462                        adapter->ptp_tx_start = jiffies;
1463                } else {
1464                        adapter->tx_hwtstamp_skipped++;
1465                }
1466        }
1467
1468        if (skb_vlan_tag_present(skb)) {
1469                tx_flags |= IGC_TX_FLAGS_VLAN;
1470                tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT);
1471        }
1472
1473        /* record initial flags and protocol */
1474        first->tx_flags = tx_flags;
1475        first->protocol = protocol;
1476
1477        tso = igc_tso(tx_ring, first, &hdr_len);
1478        if (tso < 0)
1479                goto out_drop;
1480        else if (!tso)
1481                igc_tx_csum(tx_ring, first);
1482
1483        igc_tx_map(tx_ring, first, hdr_len);
1484
1485        return NETDEV_TX_OK;
1486
1487out_drop:
1488        dev_kfree_skb_any(first->skb);
1489        first->skb = NULL;
1490
1491        return NETDEV_TX_OK;
1492}
1493
1494static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
1495                                                    struct sk_buff *skb)
1496{
1497        unsigned int r_idx = skb->queue_mapping;
1498
1499        if (r_idx >= adapter->num_tx_queues)
1500                r_idx = r_idx % adapter->num_tx_queues;
1501
1502        return adapter->tx_ring[r_idx];
1503}
1504
1505static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
1506                                  struct net_device *netdev)
1507{
1508        struct igc_adapter *adapter = netdev_priv(netdev);
1509
1510        /* The minimum packet size with TCTL.PSP set is 17 so pad the skb
1511         * in order to meet this minimum size requirement.
1512         */
1513        if (skb->len < 17) {
1514                if (skb_padto(skb, 17))
1515                        return NETDEV_TX_OK;
1516                skb->len = 17;
1517        }
1518
1519        return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
1520}
1521
1522static void igc_rx_checksum(struct igc_ring *ring,
1523                            union igc_adv_rx_desc *rx_desc,
1524                            struct sk_buff *skb)
1525{
1526        skb_checksum_none_assert(skb);
1527
1528        /* Ignore Checksum bit is set */
1529        if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM))
1530                return;
1531
1532        /* Rx checksum disabled via ethtool */
1533        if (!(ring->netdev->features & NETIF_F_RXCSUM))
1534                return;
1535
1536        /* TCP/UDP checksum error bit is set */
1537        if (igc_test_staterr(rx_desc,
1538                             IGC_RXDEXT_STATERR_L4E |
1539                             IGC_RXDEXT_STATERR_IPE)) {
1540                /* work around errata with sctp packets where the TCPE aka
1541                 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
1542                 * packets (aka let the stack check the crc32c)
1543                 */
1544                if (!(skb->len == 60 &&
1545                      test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
1546                        u64_stats_update_begin(&ring->rx_syncp);
1547                        ring->rx_stats.csum_err++;
1548                        u64_stats_update_end(&ring->rx_syncp);
1549                }
1550                /* let the stack verify checksum errors */
1551                return;
1552        }
1553        /* It must be a TCP or UDP packet with a valid checksum */
1554        if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS |
1555                                      IGC_RXD_STAT_UDPCS))
1556                skb->ip_summed = CHECKSUM_UNNECESSARY;
1557
1558        netdev_dbg(ring->netdev, "cksum success: bits %08X\n",
1559                   le32_to_cpu(rx_desc->wb.upper.status_error));
1560}
1561
1562static inline void igc_rx_hash(struct igc_ring *ring,
1563                               union igc_adv_rx_desc *rx_desc,
1564                               struct sk_buff *skb)
1565{
1566        if (ring->netdev->features & NETIF_F_RXHASH)
1567                skb_set_hash(skb,
1568                             le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
1569                             PKT_HASH_TYPE_L3);
1570}
1571
1572static void igc_rx_vlan(struct igc_ring *rx_ring,
1573                        union igc_adv_rx_desc *rx_desc,
1574                        struct sk_buff *skb)
1575{
1576        struct net_device *dev = rx_ring->netdev;
1577        u16 vid;
1578
1579        if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
1580            igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) {
1581                if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) &&
1582                    test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
1583                        vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
1584                else
1585                        vid = le16_to_cpu(rx_desc->wb.upper.vlan);
1586
1587                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
1588        }
1589}
1590
1591/**
1592 * igc_process_skb_fields - Populate skb header fields from Rx descriptor
1593 * @rx_ring: rx descriptor ring packet is being transacted on
1594 * @rx_desc: pointer to the EOP Rx descriptor
1595 * @skb: pointer to current skb being populated
1596 *
1597 * This function checks the ring, descriptor, and packet information in order
1598 * to populate the hash, checksum, VLAN, protocol, and other fields within the
1599 * skb.
1600 */
1601static void igc_process_skb_fields(struct igc_ring *rx_ring,
1602                                   union igc_adv_rx_desc *rx_desc,
1603                                   struct sk_buff *skb)
1604{
1605        igc_rx_hash(rx_ring, rx_desc, skb);
1606
1607        igc_rx_checksum(rx_ring, rx_desc, skb);
1608
1609        igc_rx_vlan(rx_ring, rx_desc, skb);
1610
1611        skb_record_rx_queue(skb, rx_ring->queue_index);
1612
1613        skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1614}
1615
1616static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features)
1617{
1618        bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
1619        struct igc_adapter *adapter = netdev_priv(netdev);
1620        struct igc_hw *hw = &adapter->hw;
1621        u32 ctrl;
1622
1623        ctrl = rd32(IGC_CTRL);
1624
1625        if (enable) {
1626                /* enable VLAN tag insert/strip */
1627                ctrl |= IGC_CTRL_VME;
1628        } else {
1629                /* disable VLAN tag insert/strip */
1630                ctrl &= ~IGC_CTRL_VME;
1631        }
1632        wr32(IGC_CTRL, ctrl);
1633}
1634
1635static void igc_restore_vlan(struct igc_adapter *adapter)
1636{
1637        igc_vlan_mode(adapter->netdev, adapter->netdev->features);
1638}
1639
1640static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
1641                                               const unsigned int size,
1642                                               int *rx_buffer_pgcnt)
1643{
1644        struct igc_rx_buffer *rx_buffer;
1645
1646        rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
1647        *rx_buffer_pgcnt =
1648#if (PAGE_SIZE < 8192)
1649                page_count(rx_buffer->page);
1650#else
1651                0;
1652#endif
1653        prefetchw(rx_buffer->page);
1654
1655        /* we are reusing so sync this buffer for CPU use */
1656        dma_sync_single_range_for_cpu(rx_ring->dev,
1657                                      rx_buffer->dma,
1658                                      rx_buffer->page_offset,
1659                                      size,
1660                                      DMA_FROM_DEVICE);
1661
1662        rx_buffer->pagecnt_bias--;
1663
1664        return rx_buffer;
1665}
1666
1667static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer,
1668                               unsigned int truesize)
1669{
1670#if (PAGE_SIZE < 8192)
1671        buffer->page_offset ^= truesize;
1672#else
1673        buffer->page_offset += truesize;
1674#endif
1675}
1676
1677static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring,
1678                                              unsigned int size)
1679{
1680        unsigned int truesize;
1681
1682#if (PAGE_SIZE < 8192)
1683        truesize = igc_rx_pg_size(ring) / 2;
1684#else
1685        truesize = ring_uses_build_skb(ring) ?
1686                   SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
1687                   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1688                   SKB_DATA_ALIGN(size);
1689#endif
1690        return truesize;
1691}
1692
1693/**
1694 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
1695 * @rx_ring: rx descriptor ring to transact packets on
1696 * @rx_buffer: buffer containing page to add
1697 * @skb: sk_buff to place the data into
1698 * @size: size of buffer to be added
1699 *
1700 * This function will add the data contained in rx_buffer->page to the skb.
1701 */
1702static void igc_add_rx_frag(struct igc_ring *rx_ring,
1703                            struct igc_rx_buffer *rx_buffer,
1704                            struct sk_buff *skb,
1705                            unsigned int size)
1706{
1707        unsigned int truesize;
1708
1709#if (PAGE_SIZE < 8192)
1710        truesize = igc_rx_pg_size(rx_ring) / 2;
1711#else
1712        truesize = ring_uses_build_skb(rx_ring) ?
1713                   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1714                   SKB_DATA_ALIGN(size);
1715#endif
1716        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
1717                        rx_buffer->page_offset, size, truesize);
1718
1719        igc_rx_buffer_flip(rx_buffer, truesize);
1720}
1721
1722static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
1723                                     struct igc_rx_buffer *rx_buffer,
1724                                     struct xdp_buff *xdp)
1725{
1726        unsigned int size = xdp->data_end - xdp->data;
1727        unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1728        unsigned int metasize = xdp->data - xdp->data_meta;
1729        struct sk_buff *skb;
1730
1731        /* prefetch first cache line of first page */
1732        net_prefetch(xdp->data_meta);
1733
1734        /* build an skb around the page buffer */
1735        skb = napi_build_skb(xdp->data_hard_start, truesize);
1736        if (unlikely(!skb))
1737                return NULL;
1738
1739        /* update pointers within the skb to store the data */
1740        skb_reserve(skb, xdp->data - xdp->data_hard_start);
1741        __skb_put(skb, size);
1742        if (metasize)
1743                skb_metadata_set(skb, metasize);
1744
1745        igc_rx_buffer_flip(rx_buffer, truesize);
1746        return skb;
1747}
1748
1749static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
1750                                         struct igc_rx_buffer *rx_buffer,
1751                                         struct xdp_buff *xdp,
1752                                         ktime_t timestamp)
1753{
1754        unsigned int metasize = xdp->data - xdp->data_meta;
1755        unsigned int size = xdp->data_end - xdp->data;
1756        unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1757        void *va = xdp->data;
1758        unsigned int headlen;
1759        struct sk_buff *skb;
1760
1761        /* prefetch first cache line of first page */
1762        net_prefetch(xdp->data_meta);
1763
1764        /* allocate a skb to store the frags */
1765        skb = napi_alloc_skb(&rx_ring->q_vector->napi,
1766                             IGC_RX_HDR_LEN + metasize);
1767        if (unlikely(!skb))
1768                return NULL;
1769
1770        if (timestamp)
1771                skb_hwtstamps(skb)->hwtstamp = timestamp;
1772
1773        /* Determine available headroom for copy */
1774        headlen = size;
1775        if (headlen > IGC_RX_HDR_LEN)
1776                headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);
1777
1778        /* align pull length to size of long to optimize memcpy performance */
1779        memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
1780               ALIGN(headlen + metasize, sizeof(long)));
1781
1782        if (metasize) {
1783                skb_metadata_set(skb, metasize);
1784                __skb_pull(skb, metasize);
1785        }
1786
1787        /* update all of the pointers */
1788        size -= headlen;
1789        if (size) {
1790                skb_add_rx_frag(skb, 0, rx_buffer->page,
1791                                (va + headlen) - page_address(rx_buffer->page),
1792                                size, truesize);
1793                igc_rx_buffer_flip(rx_buffer, truesize);
1794        } else {
1795                rx_buffer->pagecnt_bias++;
1796        }
1797
1798        return skb;
1799}
1800
1801/**
1802 * igc_reuse_rx_page - page flip buffer and store it back on the ring
1803 * @rx_ring: rx descriptor ring to store buffers on
1804 * @old_buff: donor buffer to have page reused
1805 *
1806 * Synchronizes page for reuse by the adapter
1807 */
1808static void igc_reuse_rx_page(struct igc_ring *rx_ring,
1809                              struct igc_rx_buffer *old_buff)
1810{
1811        u16 nta = rx_ring->next_to_alloc;
1812        struct igc_rx_buffer *new_buff;
1813
1814        new_buff = &rx_ring->rx_buffer_info[nta];
1815
1816        /* update, and store next to alloc */
1817        nta++;
1818        rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
1819
1820        /* Transfer page from old buffer to new buffer.
1821         * Move each member individually to avoid possible store
1822         * forwarding stalls.
1823         */
1824        new_buff->dma           = old_buff->dma;
1825        new_buff->page          = old_buff->page;
1826        new_buff->page_offset   = old_buff->page_offset;
1827        new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
1828}
1829
1830static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer,
1831                                  int rx_buffer_pgcnt)
1832{
1833        unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
1834        struct page *page = rx_buffer->page;
1835
1836        /* avoid re-using remote and pfmemalloc pages */
1837        if (!dev_page_is_reusable(page))
1838                return false;
1839
1840#if (PAGE_SIZE < 8192)
1841        /* if we are only owner of page we can reuse it */
1842        if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
1843                return false;
1844#else
1845#define IGC_LAST_OFFSET \
1846        (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
1847
1848        if (rx_buffer->page_offset > IGC_LAST_OFFSET)
1849                return false;
1850#endif
1851
1852        /* If we have drained the page fragment pool we need to update
1853         * the pagecnt_bias and page count so that we fully restock the
1854         * number of references the driver holds.
1855         */
1856        if (unlikely(pagecnt_bias == 1)) {
1857                page_ref_add(page, USHRT_MAX - 1);
1858                rx_buffer->pagecnt_bias = USHRT_MAX;
1859        }
1860
1861        return true;
1862}
1863
1864/**
1865 * igc_is_non_eop - process handling of non-EOP buffers
1866 * @rx_ring: Rx ring being processed
1867 * @rx_desc: Rx descriptor for current buffer
1868 *
1869 * This function updates next to clean.  If the buffer is an EOP buffer
1870 * this function exits returning false, otherwise it will place the
1871 * sk_buff in the next buffer to be chained and return true indicating
1872 * that this is in fact a non-EOP buffer.
1873 */
1874static bool igc_is_non_eop(struct igc_ring *rx_ring,
1875                           union igc_adv_rx_desc *rx_desc)
1876{
1877        u32 ntc = rx_ring->next_to_clean + 1;
1878
1879        /* fetch, update, and store next to clean */
1880        ntc = (ntc < rx_ring->count) ? ntc : 0;
1881        rx_ring->next_to_clean = ntc;
1882
1883        prefetch(IGC_RX_DESC(rx_ring, ntc));
1884
1885        if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
1886                return false;
1887
1888        return true;
1889}
1890
1891/**
1892 * igc_cleanup_headers - Correct corrupted or empty headers
1893 * @rx_ring: rx descriptor ring packet is being transacted on
1894 * @rx_desc: pointer to the EOP Rx descriptor
1895 * @skb: pointer to current skb being fixed
1896 *
1897 * Address the case where we are pulling data in on pages only
1898 * and as such no data is present in the skb header.
1899 *
1900 * In addition if skb is not at least 60 bytes we need to pad it so that
1901 * it is large enough to qualify as a valid Ethernet frame.
1902 *
1903 * Returns true if an error was encountered and skb was freed.
1904 */
1905static bool igc_cleanup_headers(struct igc_ring *rx_ring,
1906                                union igc_adv_rx_desc *rx_desc,
1907                                struct sk_buff *skb)
1908{
1909        /* XDP packets use error pointer so abort at this point */
1910        if (IS_ERR(skb))
1911                return true;
1912
1913        if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
1914                struct net_device *netdev = rx_ring->netdev;
1915
1916                if (!(netdev->features & NETIF_F_RXALL)) {
1917                        dev_kfree_skb_any(skb);
1918                        return true;
1919                }
1920        }
1921
1922        /* if eth_skb_pad returns an error the skb was freed */
1923        if (eth_skb_pad(skb))
1924                return true;
1925
1926        return false;
1927}
1928
1929static void igc_put_rx_buffer(struct igc_ring *rx_ring,
1930                              struct igc_rx_buffer *rx_buffer,
1931                              int rx_buffer_pgcnt)
1932{
1933        if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
1934                /* hand second half of page back to the ring */
1935                igc_reuse_rx_page(rx_ring, rx_buffer);
1936        } else {
1937                /* We are not reusing the buffer so unmap it and free
1938                 * any references we are holding to it
1939                 */
1940                dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
1941                                     igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
1942                                     IGC_RX_DMA_ATTR);
1943                __page_frag_cache_drain(rx_buffer->page,
1944                                        rx_buffer->pagecnt_bias);
1945        }
1946
1947        /* clear contents of rx_buffer */
1948        rx_buffer->page = NULL;
1949}
1950
1951static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
1952{
1953        struct igc_adapter *adapter = rx_ring->q_vector->adapter;
1954
1955        if (ring_uses_build_skb(rx_ring))
1956                return IGC_SKB_PAD;
1957        if (igc_xdp_is_enabled(adapter))
1958                return XDP_PACKET_HEADROOM;
1959
1960        return 0;
1961}
1962
1963static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
1964                                  struct igc_rx_buffer *bi)
1965{
1966        struct page *page = bi->page;
1967        dma_addr_t dma;
1968
1969        /* since we are recycling buffers we should seldom need to alloc */
1970        if (likely(page))
1971                return true;
1972
1973        /* alloc new page for storage */
1974        page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
1975        if (unlikely(!page)) {
1976                rx_ring->rx_stats.alloc_failed++;
1977                return false;
1978        }
1979
1980        /* map page for use */
1981        dma = dma_map_page_attrs(rx_ring->dev, page, 0,
1982                                 igc_rx_pg_size(rx_ring),
1983                                 DMA_FROM_DEVICE,
1984                                 IGC_RX_DMA_ATTR);
1985
1986        /* if mapping failed free memory back to system since
1987         * there isn't much point in holding memory we can't use
1988         */
1989        if (dma_mapping_error(rx_ring->dev, dma)) {
1990                __free_page(page);
1991
1992                rx_ring->rx_stats.alloc_failed++;
1993                return false;
1994        }
1995
1996        bi->dma = dma;
1997        bi->page = page;
1998        bi->page_offset = igc_rx_offset(rx_ring);
1999        page_ref_add(page, USHRT_MAX - 1);
2000        bi->pagecnt_bias = USHRT_MAX;
2001
2002        return true;
2003}
2004
2005/**
2006 * igc_alloc_rx_buffers - Replace used receive buffers; packet split
2007 * @rx_ring: rx descriptor ring
2008 * @cleaned_count: number of buffers to clean
2009 */
2010static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
2011{
2012        union igc_adv_rx_desc *rx_desc;
2013        u16 i = rx_ring->next_to_use;
2014        struct igc_rx_buffer *bi;
2015        u16 bufsz;
2016
2017        /* nothing to do */
2018        if (!cleaned_count)
2019                return;
2020
2021        rx_desc = IGC_RX_DESC(rx_ring, i);
2022        bi = &rx_ring->rx_buffer_info[i];
2023        i -= rx_ring->count;
2024
2025        bufsz = igc_rx_bufsz(rx_ring);
2026
2027        do {
2028                if (!igc_alloc_mapped_page(rx_ring, bi))
2029                        break;
2030
2031                /* sync the buffer for use by the device */
2032                dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
2033                                                 bi->page_offset, bufsz,
2034                                                 DMA_FROM_DEVICE);
2035
2036                /* Refresh the desc even if buffer_addrs didn't change
2037                 * because each write-back erases this info.
2038                 */
2039                rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
2040
2041                rx_desc++;
2042                bi++;
2043                i++;
2044                if (unlikely(!i)) {
2045                        rx_desc = IGC_RX_DESC(rx_ring, 0);
2046                        bi = rx_ring->rx_buffer_info;
2047                        i -= rx_ring->count;
2048                }
2049
2050                /* clear the length for the next_to_use descriptor */
2051                rx_desc->wb.upper.length = 0;
2052
2053                cleaned_count--;
2054        } while (cleaned_count);
2055
2056        i += rx_ring->count;
2057
2058        if (rx_ring->next_to_use != i) {
2059                /* record the next descriptor to use */
2060                rx_ring->next_to_use = i;
2061
2062                /* update next to alloc since we have filled the ring */
2063                rx_ring->next_to_alloc = i;
2064
2065                /* Force memory writes to complete before letting h/w
2066                 * know there are new descriptors to fetch.  (Only
2067                 * applicable for weak-ordered memory model archs,
2068                 * such as IA-64).
2069                 */
2070                wmb();
2071                writel(i, rx_ring->tail);
2072        }
2073}
2074
2075static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
2076{
2077        union igc_adv_rx_desc *desc;
2078        u16 i = ring->next_to_use;
2079        struct igc_rx_buffer *bi;
2080        dma_addr_t dma;
2081        bool ok = true;
2082
2083        if (!count)
2084                return ok;
2085
2086        desc = IGC_RX_DESC(ring, i);
2087        bi = &ring->rx_buffer_info[i];
2088        i -= ring->count;
2089
2090        do {
2091                bi->xdp = xsk_buff_alloc(ring->xsk_pool);
2092                if (!bi->xdp) {
2093                        ok = false;
2094                        break;
2095                }
2096
2097                dma = xsk_buff_xdp_get_dma(bi->xdp);
2098                desc->read.pkt_addr = cpu_to_le64(dma);
2099
2100                desc++;
2101                bi++;
2102                i++;
2103                if (unlikely(!i)) {
2104                        desc = IGC_RX_DESC(ring, 0);
2105                        bi = ring->rx_buffer_info;
2106                        i -= ring->count;
2107                }
2108
2109                /* Clear the length for the next_to_use descriptor. */
2110                desc->wb.upper.length = 0;
2111
2112                count--;
2113        } while (count);
2114
2115        i += ring->count;
2116
2117        if (ring->next_to_use != i) {
2118                ring->next_to_use = i;
2119
2120                /* Force memory writes to complete before letting h/w
2121                 * know there are new descriptors to fetch.  (Only
2122                 * applicable for weak-ordered memory model archs,
2123                 * such as IA-64).
2124                 */
2125                wmb();
2126                writel(i, ring->tail);
2127        }
2128
2129        return ok;
2130}
2131
2132static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
2133                                  struct xdp_frame *xdpf,
2134                                  struct igc_ring *ring)
2135{
2136        dma_addr_t dma;
2137
2138        dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
2139        if (dma_mapping_error(ring->dev, dma)) {
2140                netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
2141                return -ENOMEM;
2142        }
2143
2144        buffer->type = IGC_TX_BUFFER_TYPE_XDP;
2145        buffer->xdpf = xdpf;
2146        buffer->protocol = 0;
2147        buffer->bytecount = xdpf->len;
2148        buffer->gso_segs = 1;
2149        buffer->time_stamp = jiffies;
2150        dma_unmap_len_set(buffer, len, xdpf->len);
2151        dma_unmap_addr_set(buffer, dma, dma);
2152        return 0;
2153}
2154
2155/* This function requires __netif_tx_lock is held by the caller. */
2156static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
2157                                      struct xdp_frame *xdpf)
2158{
2159        struct igc_tx_buffer *buffer;
2160        union igc_adv_tx_desc *desc;
2161        u32 cmd_type, olinfo_status;
2162        int err;
2163
2164        if (!igc_desc_unused(ring))
2165                return -EBUSY;
2166
2167        buffer = &ring->tx_buffer_info[ring->next_to_use];
2168        err = igc_xdp_init_tx_buffer(buffer, xdpf, ring);
2169        if (err)
2170                return err;
2171
2172        cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2173                   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
2174                   buffer->bytecount;
2175        olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
2176
2177        desc = IGC_TX_DESC(ring, ring->next_to_use);
2178        desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2179        desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2180        desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma));
2181
2182        netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount);
2183
2184        buffer->next_to_watch = desc;
2185
2186        ring->next_to_use++;
2187        if (ring->next_to_use == ring->count)
2188                ring->next_to_use = 0;
2189
2190        return 0;
2191}
2192
2193static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
2194                                            int cpu)
2195{
2196        int index = cpu;
2197
2198        if (unlikely(index < 0))
2199                index = 0;
2200
2201        while (index >= adapter->num_tx_queues)
2202                index -= adapter->num_tx_queues;
2203
2204        return adapter->tx_ring[index];
2205}
2206
2207static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
2208{
2209        struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
2210        int cpu = smp_processor_id();
2211        struct netdev_queue *nq;
2212        struct igc_ring *ring;
2213        int res;
2214
2215        if (unlikely(!xdpf))
2216                return -EFAULT;
2217
2218        ring = igc_xdp_get_tx_ring(adapter, cpu);
2219        nq = txring_txq(ring);
2220
2221        __netif_tx_lock(nq, cpu);
2222        res = igc_xdp_init_tx_descriptor(ring, xdpf);
2223        __netif_tx_unlock(nq);
2224        return res;
2225}
2226
2227/* This function assumes rcu_read_lock() is held by the caller. */
2228static int __igc_xdp_run_prog(struct igc_adapter *adapter,
2229                              struct bpf_prog *prog,
2230                              struct xdp_buff *xdp)
2231{
2232        u32 act = bpf_prog_run_xdp(prog, xdp);
2233
2234        switch (act) {
2235        case XDP_PASS:
2236                return IGC_XDP_PASS;
2237        case XDP_TX:
2238                if (igc_xdp_xmit_back(adapter, xdp) < 0)
2239                        goto out_failure;
2240                return IGC_XDP_TX;
2241        case XDP_REDIRECT:
2242                if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
2243                        goto out_failure;
2244                return IGC_XDP_REDIRECT;
2245                break;
2246        default:
2247                bpf_warn_invalid_xdp_action(adapter->netdev, prog, act);
2248                fallthrough;
2249        case XDP_ABORTED:
2250out_failure:
2251                trace_xdp_exception(adapter->netdev, prog, act);
2252                fallthrough;
2253        case XDP_DROP:
2254                return IGC_XDP_CONSUMED;
2255        }
2256}
2257
2258static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
2259                                        struct xdp_buff *xdp)
2260{
2261        struct bpf_prog *prog;
2262        int res;
2263
2264        prog = READ_ONCE(adapter->xdp_prog);
2265        if (!prog) {
2266                res = IGC_XDP_PASS;
2267                goto out;
2268        }
2269
2270        res = __igc_xdp_run_prog(adapter, prog, xdp);
2271
2272out:
2273        return ERR_PTR(-res);
2274}
2275
2276/* This function assumes __netif_tx_lock is held by the caller. */
2277static void igc_flush_tx_descriptors(struct igc_ring *ring)
2278{
2279        /* Once tail pointer is updated, hardware can fetch the descriptors
2280         * any time so we issue a write membar here to ensure all memory
2281         * writes are complete before the tail pointer is updated.
2282         */
2283        wmb();
2284        writel(ring->next_to_use, ring->tail);
2285}
2286
2287static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
2288{
2289        int cpu = smp_processor_id();
2290        struct netdev_queue *nq;
2291        struct igc_ring *ring;
2292
2293        if (status & IGC_XDP_TX) {
2294                ring = igc_xdp_get_tx_ring(adapter, cpu);
2295                nq = txring_txq(ring);
2296
2297                __netif_tx_lock(nq, cpu);
2298                igc_flush_tx_descriptors(ring);
2299                __netif_tx_unlock(nq);
2300        }
2301
2302        if (status & IGC_XDP_REDIRECT)
2303                xdp_do_flush();
2304}
2305
2306static void igc_update_rx_stats(struct igc_q_vector *q_vector,
2307                                unsigned int packets, unsigned int bytes)
2308{
2309        struct igc_ring *ring = q_vector->rx.ring;
2310
2311        u64_stats_update_begin(&ring->rx_syncp);
2312        ring->rx_stats.packets += packets;
2313        ring->rx_stats.bytes += bytes;
2314        u64_stats_update_end(&ring->rx_syncp);
2315
2316        q_vector->rx.total_packets += packets;
2317        q_vector->rx.total_bytes += bytes;
2318}
2319
2320static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
2321{
2322        unsigned int total_bytes = 0, total_packets = 0;
2323        struct igc_adapter *adapter = q_vector->adapter;
2324        struct igc_ring *rx_ring = q_vector->rx.ring;
2325        struct sk_buff *skb = rx_ring->skb;
2326        u16 cleaned_count = igc_desc_unused(rx_ring);
2327        int xdp_status = 0, rx_buffer_pgcnt;
2328
2329        while (likely(total_packets < budget)) {
2330                union igc_adv_rx_desc *rx_desc;
2331                struct igc_rx_buffer *rx_buffer;
2332                unsigned int size, truesize;
2333                ktime_t timestamp = 0;
2334                struct xdp_buff xdp;
2335                int pkt_offset = 0;
2336                void *pktbuf;
2337
2338                /* return some buffers to hardware, one at a time is too slow */
2339                if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
2340                        igc_alloc_rx_buffers(rx_ring, cleaned_count);
2341                        cleaned_count = 0;
2342                }
2343
2344                rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
2345                size = le16_to_cpu(rx_desc->wb.upper.length);
2346                if (!size)
2347                        break;
2348
2349                /* This memory barrier is needed to keep us from reading
2350                 * any other fields out of the rx_desc until we know the
2351                 * descriptor has been written back
2352                 */
2353                dma_rmb();
2354
2355                rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
2356                truesize = igc_get_rx_frame_truesize(rx_ring, size);
2357
2358                pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
2359
2360                if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
2361                        timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2362                                                        pktbuf);
2363                        pkt_offset = IGC_TS_HDR_LEN;
2364                        size -= IGC_TS_HDR_LEN;
2365                }
2366
2367                if (!skb) {
2368                        xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
2369                        xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
2370                                         igc_rx_offset(rx_ring) + pkt_offset,
2371                                         size, true);
2372
2373                        skb = igc_xdp_run_prog(adapter, &xdp);
2374                }
2375
2376                if (IS_ERR(skb)) {
2377                        unsigned int xdp_res = -PTR_ERR(skb);
2378
2379                        switch (xdp_res) {
2380                        case IGC_XDP_CONSUMED:
2381                                rx_buffer->pagecnt_bias++;
2382                                break;
2383                        case IGC_XDP_TX:
2384                        case IGC_XDP_REDIRECT:
2385                                igc_rx_buffer_flip(rx_buffer, truesize);
2386                                xdp_status |= xdp_res;
2387                                break;
2388                        }
2389
2390                        total_packets++;
2391                        total_bytes += size;
2392                } else if (skb)
2393                        igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
2394                else if (ring_uses_build_skb(rx_ring))
2395                        skb = igc_build_skb(rx_ring, rx_buffer, &xdp);
2396                else
2397                        skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
2398                                                timestamp);
2399
2400                /* exit if we failed to retrieve a buffer */
2401                if (!skb) {
2402                        rx_ring->rx_stats.alloc_failed++;
2403                        rx_buffer->pagecnt_bias++;
2404                        break;
2405                }
2406
2407                igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
2408                cleaned_count++;
2409
2410                /* fetch next buffer in frame if non-eop */
2411                if (igc_is_non_eop(rx_ring, rx_desc))
2412                        continue;
2413
2414                /* verify the packet layout is correct */
2415                if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
2416                        skb = NULL;
2417                        continue;
2418                }
2419
2420                /* probably a little skewed due to removing CRC */
2421                total_bytes += skb->len;
2422
2423                /* populate checksum, VLAN, and protocol */
2424                igc_process_skb_fields(rx_ring, rx_desc, skb);
2425
2426                napi_gro_receive(&q_vector->napi, skb);
2427
2428                /* reset skb pointer */
2429                skb = NULL;
2430
2431                /* update budget accounting */
2432                total_packets++;
2433        }
2434
2435        if (xdp_status)
2436                igc_finalize_xdp(adapter, xdp_status);
2437
2438        /* place incomplete frames back on ring for completion */
2439        rx_ring->skb = skb;
2440
2441        igc_update_rx_stats(q_vector, total_packets, total_bytes);
2442
2443        if (cleaned_count)
2444                igc_alloc_rx_buffers(rx_ring, cleaned_count);
2445
2446        return total_packets;
2447}
2448
2449static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
2450                                            struct xdp_buff *xdp)
2451{
2452        unsigned int totalsize = xdp->data_end - xdp->data_meta;
2453        unsigned int metasize = xdp->data - xdp->data_meta;
2454        struct sk_buff *skb;
2455
2456        net_prefetch(xdp->data_meta);
2457
2458        skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize,
2459                               GFP_ATOMIC | __GFP_NOWARN);
2460        if (unlikely(!skb))
2461                return NULL;
2462
2463        memcpy(__skb_put(skb, totalsize), xdp->data_meta,
2464               ALIGN(totalsize, sizeof(long)));
2465
2466        if (metasize) {
2467                skb_metadata_set(skb, metasize);
2468                __skb_pull(skb, metasize);
2469        }
2470
2471        return skb;
2472}
2473
2474static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
2475                                union igc_adv_rx_desc *desc,
2476                                struct xdp_buff *xdp,
2477                                ktime_t timestamp)
2478{
2479        struct igc_ring *ring = q_vector->rx.ring;
2480        struct sk_buff *skb;
2481
2482        skb = igc_construct_skb_zc(ring, xdp);
2483        if (!skb) {
2484                ring->rx_stats.alloc_failed++;
2485                return;
2486        }
2487
2488        if (timestamp)
2489                skb_hwtstamps(skb)->hwtstamp = timestamp;
2490
2491        if (igc_cleanup_headers(ring, desc, skb))
2492                return;
2493
2494        igc_process_skb_fields(ring, desc, skb);
2495        napi_gro_receive(&q_vector->napi, skb);
2496}
2497
2498static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
2499{
2500        struct igc_adapter *adapter = q_vector->adapter;
2501        struct igc_ring *ring = q_vector->rx.ring;
2502        u16 cleaned_count = igc_desc_unused(ring);
2503        int total_bytes = 0, total_packets = 0;
2504        u16 ntc = ring->next_to_clean;
2505        struct bpf_prog *prog;
2506        bool failure = false;
2507        int xdp_status = 0;
2508
2509        rcu_read_lock();
2510
2511        prog = READ_ONCE(adapter->xdp_prog);
2512
2513        while (likely(total_packets < budget)) {
2514                union igc_adv_rx_desc *desc;
2515                struct igc_rx_buffer *bi;
2516                ktime_t timestamp = 0;
2517                unsigned int size;
2518                int res;
2519
2520                desc = IGC_RX_DESC(ring, ntc);
2521                size = le16_to_cpu(desc->wb.upper.length);
2522                if (!size)
2523                        break;
2524
2525                /* This memory barrier is needed to keep us from reading
2526                 * any other fields out of the rx_desc until we know the
2527                 * descriptor has been written back
2528                 */
2529                dma_rmb();
2530
2531                bi = &ring->rx_buffer_info[ntc];
2532
2533                if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
2534                        timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2535                                                        bi->xdp->data);
2536
2537                        bi->xdp->data += IGC_TS_HDR_LEN;
2538
2539                        /* HW timestamp has been copied into local variable. Metadata
2540                         * length when XDP program is called should be 0.
2541                         */
2542                        bi->xdp->data_meta += IGC_TS_HDR_LEN;
2543                        size -= IGC_TS_HDR_LEN;
2544                }
2545
2546                bi->xdp->data_end = bi->xdp->data + size;
2547                xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);
2548
2549                res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
2550                switch (res) {
2551                case IGC_XDP_PASS:
2552                        igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp);
2553                        fallthrough;
2554                case IGC_XDP_CONSUMED:
2555                        xsk_buff_free(bi->xdp);
2556                        break;
2557                case IGC_XDP_TX:
2558                case IGC_XDP_REDIRECT:
2559                        xdp_status |= res;
2560                        break;
2561                }
2562
2563                bi->xdp = NULL;
2564                total_bytes += size;
2565                total_packets++;
2566                cleaned_count++;
2567                ntc++;
2568                if (ntc == ring->count)
2569                        ntc = 0;
2570        }
2571
2572        ring->next_to_clean = ntc;
2573        rcu_read_unlock();
2574
2575        if (cleaned_count >= IGC_RX_BUFFER_WRITE)
2576                failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count);
2577
2578        if (xdp_status)
2579                igc_finalize_xdp(adapter, xdp_status);
2580
2581        igc_update_rx_stats(q_vector, total_packets, total_bytes);
2582
2583        if (xsk_uses_need_wakeup(ring->xsk_pool)) {
2584                if (failure || ring->next_to_clean == ring->next_to_use)
2585                        xsk_set_rx_need_wakeup(ring->xsk_pool);
2586                else
2587                        xsk_clear_rx_need_wakeup(ring->xsk_pool);
2588                return total_packets;
2589        }
2590
2591        return failure ? budget : total_packets;
2592}
2593
2594static void igc_update_tx_stats(struct igc_q_vector *q_vector,
2595                                unsigned int packets, unsigned int bytes)
2596{
2597        struct igc_ring *ring = q_vector->tx.ring;
2598
2599        u64_stats_update_begin(&ring->tx_syncp);
2600        ring->tx_stats.bytes += bytes;
2601        ring->tx_stats.packets += packets;
2602        u64_stats_update_end(&ring->tx_syncp);
2603
2604        q_vector->tx.total_bytes += bytes;
2605        q_vector->tx.total_packets += packets;
2606}
2607
2608static void igc_xdp_xmit_zc(struct igc_ring *ring)
2609{
2610        struct xsk_buff_pool *pool = ring->xsk_pool;
2611        struct netdev_queue *nq = txring_txq(ring);
2612        union igc_adv_tx_desc *tx_desc = NULL;
2613        int cpu = smp_processor_id();
2614        u16 ntu = ring->next_to_use;
2615        struct xdp_desc xdp_desc;
2616        u16 budget;
2617
2618        if (!netif_carrier_ok(ring->netdev))
2619                return;
2620
2621        __netif_tx_lock(nq, cpu);
2622
2623        budget = igc_desc_unused(ring);
2624
2625        while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
2626                u32 cmd_type, olinfo_status;
2627                struct igc_tx_buffer *bi;
2628                dma_addr_t dma;
2629
2630                cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2631                           IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
2632                           xdp_desc.len;
2633                olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT;
2634
2635                dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
2636                xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len);
2637
2638                tx_desc = IGC_TX_DESC(ring, ntu);
2639                tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2640                tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2641                tx_desc->read.buffer_addr = cpu_to_le64(dma);
2642
2643                bi = &ring->tx_buffer_info[ntu];
2644                bi->type = IGC_TX_BUFFER_TYPE_XSK;
2645                bi->protocol = 0;
2646                bi->bytecount = xdp_desc.len;
2647                bi->gso_segs = 1;
2648                bi->time_stamp = jiffies;
2649                bi->next_to_watch = tx_desc;
2650
2651                netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len);
2652
2653                ntu++;
2654                if (ntu == ring->count)
2655                        ntu = 0;
2656        }
2657
2658        ring->next_to_use = ntu;
2659        if (tx_desc) {
2660                igc_flush_tx_descriptors(ring);
2661                xsk_tx_release(pool);
2662        }
2663
2664        __netif_tx_unlock(nq);
2665}
2666
2667/**
2668 * igc_clean_tx_irq - Reclaim resources after transmit completes
2669 * @q_vector: pointer to q_vector containing needed info
2670 * @napi_budget: Used to determine if we are in netpoll
2671 *
2672 * returns true if ring is completely cleaned
2673 */
2674static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
2675{
2676        struct igc_adapter *adapter = q_vector->adapter;
2677        unsigned int total_bytes = 0, total_packets = 0;
2678        unsigned int budget = q_vector->tx.work_limit;
2679        struct igc_ring *tx_ring = q_vector->tx.ring;
2680        unsigned int i = tx_ring->next_to_clean;
2681        struct igc_tx_buffer *tx_buffer;
2682        union igc_adv_tx_desc *tx_desc;
2683        u32 xsk_frames = 0;
2684
2685        if (test_bit(__IGC_DOWN, &adapter->state))
2686                return true;
2687
2688        tx_buffer = &tx_ring->tx_buffer_info[i];
2689        tx_desc = IGC_TX_DESC(tx_ring, i);
2690        i -= tx_ring->count;
2691
2692        do {
2693                union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
2694
2695                /* if next_to_watch is not set then there is no work pending */
2696                if (!eop_desc)
2697                        break;
2698
2699                /* prevent any other reads prior to eop_desc */
2700                smp_rmb();
2701
2702                /* if DD is not set pending work has not been completed */
2703                if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
2704                        break;
2705
2706                /* clear next_to_watch to prevent false hangs */
2707                tx_buffer->next_to_watch = NULL;
2708
2709                /* update the statistics for this packet */
2710                total_bytes += tx_buffer->bytecount;
2711                total_packets += tx_buffer->gso_segs;
2712
2713                switch (tx_buffer->type) {
2714                case IGC_TX_BUFFER_TYPE_XSK:
2715                        xsk_frames++;
2716                        break;
2717                case IGC_TX_BUFFER_TYPE_XDP:
2718                        xdp_return_frame(tx_buffer->xdpf);
2719                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2720                        break;
2721                case IGC_TX_BUFFER_TYPE_SKB:
2722                        napi_consume_skb(tx_buffer->skb, napi_budget);
2723                        igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2724                        break;
2725                default:
2726                        netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
2727                        break;
2728                }
2729
2730                /* clear last DMA location and unmap remaining buffers */
2731                while (tx_desc != eop_desc) {
2732                        tx_buffer++;
2733                        tx_desc++;
2734                        i++;
2735                        if (unlikely(!i)) {
2736                                i -= tx_ring->count;
2737                                tx_buffer = tx_ring->tx_buffer_info;
2738                                tx_desc = IGC_TX_DESC(tx_ring, 0);
2739                        }
2740
2741                        /* unmap any remaining paged data */
2742                        if (dma_unmap_len(tx_buffer, len))
2743                                igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2744                }
2745
2746                /* move us one more past the eop_desc for start of next pkt */
2747                tx_buffer++;
2748                tx_desc++;
2749                i++;
2750                if (unlikely(!i)) {
2751                        i -= tx_ring->count;
2752                        tx_buffer = tx_ring->tx_buffer_info;
2753                        tx_desc = IGC_TX_DESC(tx_ring, 0);
2754                }
2755
2756                /* issue prefetch for next Tx descriptor */
2757                prefetch(tx_desc);
2758
2759                /* update budget accounting */
2760                budget--;
2761        } while (likely(budget));
2762
2763        netdev_tx_completed_queue(txring_txq(tx_ring),
2764                                  total_packets, total_bytes);
2765
2766        i += tx_ring->count;
2767        tx_ring->next_to_clean = i;
2768
2769        igc_update_tx_stats(q_vector, total_packets, total_bytes);
2770
2771        if (tx_ring->xsk_pool) {
2772                if (xsk_frames)
2773                        xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
2774                if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
2775                        xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
2776                igc_xdp_xmit_zc(tx_ring);
2777        }
2778
2779        if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
2780                struct igc_hw *hw = &adapter->hw;
2781
2782                /* Detect a transmit hang in hardware, this serializes the
2783                 * check with the clearing of time_stamp and movement of i
2784                 */
2785                clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
2786                if (tx_buffer->next_to_watch &&
2787                    time_after(jiffies, tx_buffer->time_stamp +
2788                    (adapter->tx_timeout_factor * HZ)) &&
2789                    !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
2790                        /* detected Tx unit hang */
2791                        netdev_err(tx_ring->netdev,
2792                                   "Detected Tx Unit Hang\n"
2793                                   "  Tx Queue             <%d>\n"
2794                                   "  TDH                  <%x>\n"
2795                                   "  TDT                  <%x>\n"
2796                                   "  next_to_use          <%x>\n"
2797                                   "  next_to_clean        <%x>\n"
2798                                   "buffer_info[next_to_clean]\n"
2799                                   "  time_stamp           <%lx>\n"
2800                                   "  next_to_watch        <%p>\n"
2801                                   "  jiffies              <%lx>\n"
2802                                   "  desc.status          <%x>\n",
2803                                   tx_ring->queue_index,
2804                                   rd32(IGC_TDH(tx_ring->reg_idx)),
2805                                   readl(tx_ring->tail),
2806                                   tx_ring->next_to_use,
2807                                   tx_ring->next_to_clean,
2808                                   tx_buffer->time_stamp,
2809                                   tx_buffer->next_to_watch,
2810                                   jiffies,
2811                                   tx_buffer->next_to_watch->wb.status);
2812                        netif_stop_subqueue(tx_ring->netdev,
2813                                            tx_ring->queue_index);
2814
2815                        /* we are about to reset, no point in enabling stuff */
2816                        return true;
2817                }
2818        }
2819
2820#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
2821        if (unlikely(total_packets &&
2822                     netif_carrier_ok(tx_ring->netdev) &&
2823                     igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
2824                /* Make sure that anybody stopping the queue after this
2825                 * sees the new next_to_clean.
2826                 */
2827                smp_mb();
2828                if (__netif_subqueue_stopped(tx_ring->netdev,
2829                                             tx_ring->queue_index) &&
2830                    !(test_bit(__IGC_DOWN, &adapter->state))) {
2831                        netif_wake_subqueue(tx_ring->netdev,
2832                                            tx_ring->queue_index);
2833
2834                        u64_stats_update_begin(&tx_ring->tx_syncp);
2835                        tx_ring->tx_stats.restart_queue++;
2836                        u64_stats_update_end(&tx_ring->tx_syncp);
2837                }
2838        }
2839
2840        return !!budget;
2841}
2842
2843static int igc_find_mac_filter(struct igc_adapter *adapter,
2844                               enum igc_mac_filter_type type, const u8 *addr)
2845{
2846        struct igc_hw *hw = &adapter->hw;
2847        int max_entries = hw->mac.rar_entry_count;
2848        u32 ral, rah;
2849        int i;
2850
2851        for (i = 0; i < max_entries; i++) {
2852                ral = rd32(IGC_RAL(i));
2853                rah = rd32(IGC_RAH(i));
2854
2855                if (!(rah & IGC_RAH_AV))
2856                        continue;
2857                if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type)
2858                        continue;
2859                if ((rah & IGC_RAH_RAH_MASK) !=
2860                    le16_to_cpup((__le16 *)(addr + 4)))
2861                        continue;
2862                if (ral != le32_to_cpup((__le32 *)(addr)))
2863                        continue;
2864
2865                return i;
2866        }
2867
2868        return -1;
2869}
2870
2871static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
2872{
2873        struct igc_hw *hw = &adapter->hw;
2874        int max_entries = hw->mac.rar_entry_count;
2875        u32 rah;
2876        int i;
2877
2878        for (i = 0; i < max_entries; i++) {
2879                rah = rd32(IGC_RAH(i));
2880
2881                if (!(rah & IGC_RAH_AV))
2882                        return i;
2883        }
2884
2885        return -1;
2886}
2887
2888/**
2889 * igc_add_mac_filter() - Add MAC address filter
2890 * @adapter: Pointer to adapter where the filter should be added
2891 * @type: MAC address filter type (source or destination)
2892 * @addr: MAC address
2893 * @queue: If non-negative, queue assignment feature is enabled and frames
2894 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
2895 *         assignment is disabled.
2896 *
2897 * Return: 0 in case of success, negative errno code otherwise.
2898 */
2899static int igc_add_mac_filter(struct igc_adapter *adapter,
2900                              enum igc_mac_filter_type type, const u8 *addr,
2901                              int queue)
2902{
2903        struct net_device *dev = adapter->netdev;
2904        int index;
2905
2906        index = igc_find_mac_filter(adapter, type, addr);
2907        if (index >= 0)
2908                goto update_filter;
2909
2910        index = igc_get_avail_mac_filter_slot(adapter);
2911        if (index < 0)
2912                return -ENOSPC;
2913
2914        netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n",
2915                   index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
2916                   addr, queue);
2917
2918update_filter:
2919        igc_set_mac_filter_hw(adapter, index, type, addr, queue);
2920        return 0;
2921}
2922
2923/**
2924 * igc_del_mac_filter() - Delete MAC address filter
2925 * @adapter: Pointer to adapter where the filter should be deleted from
2926 * @type: MAC address filter type (source or destination)
2927 * @addr: MAC address
2928 */
2929static void igc_del_mac_filter(struct igc_adapter *adapter,
2930                               enum igc_mac_filter_type type, const u8 *addr)
2931{
2932        struct net_device *dev = adapter->netdev;
2933        int index;
2934
2935        index = igc_find_mac_filter(adapter, type, addr);
2936        if (index < 0)
2937                return;
2938
2939        if (index == 0) {
2940                /* If this is the default filter, we don't actually delete it.
2941                 * We just reset to its default value i.e. disable queue
2942                 * assignment.
2943                 */
2944                netdev_dbg(dev, "Disable default MAC filter queue assignment");
2945
2946                igc_set_mac_filter_hw(adapter, 0, type, addr, -1);
2947        } else {
2948                netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n",
2949                           index,
2950                           type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
2951                           addr);
2952
2953                igc_clear_mac_filter_hw(adapter, index);
2954        }
2955}
2956
2957/**
2958 * igc_add_vlan_prio_filter() - Add VLAN priority filter
2959 * @adapter: Pointer to adapter where the filter should be added
2960 * @prio: VLAN priority value
2961 * @queue: Queue number which matching frames are assigned to
2962 *
2963 * Return: 0 in case of success, negative errno code otherwise.
2964 */
2965static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
2966                                    int queue)
2967{
2968        struct net_device *dev = adapter->netdev;
2969        struct igc_hw *hw = &adapter->hw;
2970        u32 vlanpqf;
2971
2972        vlanpqf = rd32(IGC_VLANPQF);
2973
2974        if (vlanpqf & IGC_VLANPQF_VALID(prio)) {
2975                netdev_dbg(dev, "VLAN priority filter already in use\n");
2976                return -EEXIST;
2977        }
2978
2979        vlanpqf |= IGC_VLANPQF_QSEL(prio, queue);
2980        vlanpqf |= IGC_VLANPQF_VALID(prio);
2981
2982        wr32(IGC_VLANPQF, vlanpqf);
2983
2984        netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n",
2985                   prio, queue);
2986        return 0;
2987}
2988
2989/**
2990 * igc_del_vlan_prio_filter() - Delete VLAN priority filter
2991 * @adapter: Pointer to adapter where the filter should be deleted from
2992 * @prio: VLAN priority value
2993 */
2994static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
2995{
2996        struct igc_hw *hw = &adapter->hw;
2997        u32 vlanpqf;
2998
2999        vlanpqf = rd32(IGC_VLANPQF);
3000
3001        vlanpqf &= ~IGC_VLANPQF_VALID(prio);
3002        vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK);
3003
3004        wr32(IGC_VLANPQF, vlanpqf);
3005
3006        netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n",
3007                   prio);
3008}
3009
3010static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter)
3011{
3012        struct igc_hw *hw = &adapter->hw;
3013        int i;
3014
3015        for (i = 0; i < MAX_ETYPE_FILTER; i++) {
3016                u32 etqf = rd32(IGC_ETQF(i));
3017
3018                if (!(etqf & IGC_ETQF_FILTER_ENABLE))
3019                        return i;
3020        }
3021
3022        return -1;
3023}
3024
3025/**
3026 * igc_add_etype_filter() - Add ethertype filter
3027 * @adapter: Pointer to adapter where the filter should be added
3028 * @etype: Ethertype value
3029 * @queue: If non-negative, queue assignment feature is enabled and frames
3030 *         matching the filter are enqueued onto 'queue'. Otherwise, queue
3031 *         assignment is disabled.
3032 *
3033 * Return: 0 in case of success, negative errno code otherwise.
3034 */
3035static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype,
3036                                int queue)
3037{
3038        struct igc_hw *hw = &adapter->hw;
3039        int index;
3040        u32 etqf;
3041
3042        index = igc_get_avail_etype_filter_slot(adapter);
3043        if (index < 0)
3044                return -ENOSPC;
3045
3046        etqf = rd32(IGC_ETQF(index));
3047
3048        etqf &= ~IGC_ETQF_ETYPE_MASK;
3049        etqf |= etype;
3050
3051        if (queue >= 0) {
3052                etqf &= ~IGC_ETQF_QUEUE_MASK;
3053                etqf |= (queue << IGC_ETQF_QUEUE_SHIFT);
3054                etqf |= IGC_ETQF_QUEUE_ENABLE;
3055        }
3056
3057        etqf |= IGC_ETQF_FILTER_ENABLE;
3058
3059        wr32(IGC_ETQF(index), etqf);
3060
3061        netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n",
3062                   etype, queue);
3063        return 0;
3064}
3065
3066static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype)
3067{
3068        struct igc_hw *hw = &adapter->hw;
3069        int i;
3070
3071        for (i = 0; i < MAX_ETYPE_FILTER; i++) {
3072                u32 etqf = rd32(IGC_ETQF(i));
3073
3074                if ((etqf & IGC_ETQF_ETYPE_MASK) == etype)
3075                        return i;
3076        }
3077
3078        return -1;
3079}
3080
3081/**
3082 * igc_del_etype_filter() - Delete ethertype filter
3083 * @adapter: Pointer to adapter where the filter should be deleted from
3084 * @etype: Ethertype value
3085 */
3086static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
3087{
3088        struct igc_hw *hw = &adapter->hw;
3089        int index;
3090
3091        index = igc_find_etype_filter(adapter, etype);
3092        if (index < 0)
3093                return;
3094
3095        wr32(IGC_ETQF(index), 0);
3096
3097        netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n",
3098                   etype);
3099}
3100
3101static int igc_flex_filter_select(struct igc_adapter *adapter,
3102                                  struct igc_flex_filter *input,
3103                                  u32 *fhft)
3104{
3105        struct igc_hw *hw = &adapter->hw;
3106        u8 fhft_index;
3107        u32 fhftsl;
3108
3109        if (input->index >= MAX_FLEX_FILTER) {
3110                dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n");
3111                return -EINVAL;
3112        }
3113
3114        /* Indirect table select register */
3115        fhftsl = rd32(IGC_FHFTSL);
3116        fhftsl &= ~IGC_FHFTSL_FTSL_MASK;
3117        switch (input->index) {
3118        case 0 ... 7:
3119                fhftsl |= 0x00;
3120                break;
3121        case 8 ... 15:
3122                fhftsl |= 0x01;
3123                break;
3124        case 16 ... 23:
3125                fhftsl |= 0x02;
3126                break;
3127        case 24 ... 31:
3128                fhftsl |= 0x03;
3129                break;
3130        }
3131        wr32(IGC_FHFTSL, fhftsl);
3132
3133        /* Normalize index down to host table register */
3134        fhft_index = input->index % 8;
3135
3136        *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) :
3137                IGC_FHFT_EXT(fhft_index - 4);
3138
3139        return 0;
3140}
3141
3142static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
3143                                    struct igc_flex_filter *input)
3144{
3145        struct device *dev = &adapter->pdev->dev;
3146        struct igc_hw *hw = &adapter->hw;
3147        u8 *data = input->data;
3148        u8 *mask = input->mask;
3149        u32 queuing;
3150        u32 fhft;
3151        u32 wufc;
3152        int ret;
3153        int i;
3154
3155        /* Length has to be aligned to 8. Otherwise the filter will fail. Bail
3156         * out early to avoid surprises later.
3157         */
3158        if (input->length % 8 != 0) {
3159                dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n");
3160                return -EINVAL;
3161        }
3162
3163        /* Select corresponding flex filter register and get base for host table. */
3164        ret = igc_flex_filter_select(adapter, input, &fhft);
3165        if (ret)
3166                return ret;
3167
3168        /* When adding a filter globally disable flex filter feature. That is
3169         * recommended within the datasheet.
3170         */
3171        wufc = rd32(IGC_WUFC);
3172        wufc &= ~IGC_WUFC_FLEX_HQ;
3173        wr32(IGC_WUFC, wufc);
3174
3175        /* Configure filter */
3176        queuing = input->length & IGC_FHFT_LENGTH_MASK;
3177        queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK;
3178        queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK;
3179
3180        if (input->immediate_irq)
3181                queuing |= IGC_FHFT_IMM_INT;
3182
3183        if (input->drop)
3184                queuing |= IGC_FHFT_DROP;
3185
3186        wr32(fhft + 0xFC, queuing);
3187
3188        /* Write data (128 byte) and mask (128 bit) */
3189        for (i = 0; i < 16; ++i) {
3190                const size_t data_idx = i * 8;
3191                const size_t row_idx = i * 16;
3192                u32 dw0 =
3193                        (data[data_idx + 0] << 0) |
3194                        (data[data_idx + 1] << 8) |
3195                        (data[data_idx + 2] << 16) |
3196                        (data[data_idx + 3] << 24);
3197                u32 dw1 =
3198                        (data[data_idx + 4] << 0) |
3199                        (data[data_idx + 5] << 8) |
3200                        (data[data_idx + 6] << 16) |
3201                        (data[data_idx + 7] << 24);
3202                u32 tmp;
3203
3204                /* Write row: dw0, dw1 and mask */
3205                wr32(fhft + row_idx, dw0);
3206                wr32(fhft + row_idx + 4, dw1);
3207
3208                /* mask is only valid for MASK(7, 0) */
3209                tmp = rd32(fhft + row_idx + 8);
3210                tmp &= ~GENMASK(7, 0);
3211                tmp |= mask[i];
3212                wr32(fhft + row_idx + 8, tmp);
3213        }
3214
3215        /* Enable filter. */
3216        wufc |= IGC_WUFC_FLEX_HQ;
3217        if (input->index > 8) {
3218                /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */
3219                u32 wufc_ext = rd32(IGC_WUFC_EXT);
3220
3221                wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8));
3222
3223                wr32(IGC_WUFC_EXT, wufc_ext);
3224        } else {
3225                wufc |= (IGC_WUFC_FLX0 << input->index);
3226        }
3227        wr32(IGC_WUFC, wufc);
3228
3229        dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n",
3230                input->index);
3231
3232        return 0;
3233}
3234
3235static void igc_flex_filter_add_field(struct igc_flex_filter *flex,
3236                                      const void *src, unsigned int offset,
3237                                      size_t len, const void *mask)
3238{
3239        int i;
3240
3241        /* data */
3242        memcpy(&flex->data[offset], src, len);
3243
3244        /* mask */
3245        for (i = 0; i < len; ++i) {
3246                const unsigned int idx = i + offset;
3247                const u8 *ptr = mask;
3248
3249                if (mask) {
3250                        if (ptr[i] & 0xff)
3251                                flex->mask[idx / 8] |= BIT(idx % 8);
3252
3253                        continue;
3254                }
3255
3256                flex->mask[idx / 8] |= BIT(idx % 8);
3257        }
3258}
3259
3260static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter)
3261{
3262        struct igc_hw *hw = &adapter->hw;
3263        u32 wufc, wufc_ext;
3264        int i;
3265
3266        wufc = rd32(IGC_WUFC);
3267        wufc_ext = rd32(IGC_WUFC_EXT);
3268
3269        for (i = 0; i < MAX_FLEX_FILTER; i++) {
3270                if (i < 8) {
3271                        if (!(wufc & (IGC_WUFC_FLX0 << i)))
3272                                return i;
3273                } else {
3274                        if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8))))
3275                                return i;
3276                }
3277        }
3278
3279        return -ENOSPC;
3280}
3281
3282static bool igc_flex_filter_in_use(struct igc_adapter *adapter)
3283{
3284        struct igc_hw *hw = &adapter->hw;
3285        u32 wufc, wufc_ext;
3286
3287        wufc = rd32(IGC_WUFC);
3288        wufc_ext = rd32(IGC_WUFC_EXT);
3289
3290        if (wufc & IGC_WUFC_FILTER_MASK)
3291                return true;
3292
3293        if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK)
3294                return true;
3295
3296        return false;
3297}
3298
3299static int igc_add_flex_filter(struct igc_adapter *adapter,
3300                               struct igc_nfc_rule *rule)
3301{
3302        struct igc_flex_filter flex = { };
3303        struct igc_nfc_filter *filter = &rule->filter;
3304        unsigned int eth_offset, user_offset;
3305        int ret, index;
3306        bool vlan;
3307
3308        index = igc_find_avail_flex_filter_slot(adapter);
3309        if (index < 0)
3310                return -ENOSPC;
3311
3312        /* Construct the flex filter:
3313         *  -> dest_mac [6]
3314         *  -> src_mac [6]
3315         *  -> tpid [2]
3316         *  -> vlan tci [2]
3317         *  -> ether type [2]
3318         *  -> user data [8]
3319         *  -> = 26 bytes => 32 length
3320         */
3321        flex.index    = index;
3322        flex.length   = 32;
3323        flex.rx_queue = rule->action;
3324
3325        vlan = rule->filter.vlan_tci || rule->filter.vlan_etype;
3326        eth_offset = vlan ? 16 : 12;
3327        user_offset = vlan ? 18 : 14;
3328
3329        /* Add destination MAC  */
3330        if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
3331                igc_flex_filter_add_field(&flex, &filter->dst_addr, 0,
3332                                          ETH_ALEN, NULL);
3333
3334        /* Add source MAC */
3335        if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
3336                igc_flex_filter_add_field(&flex, &filter->src_addr, 6,
3337                                          ETH_ALEN, NULL);
3338
3339        /* Add VLAN etype */
3340        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE)
3341                igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12,
3342                                          sizeof(filter->vlan_etype),
3343                                          NULL);
3344
3345        /* Add VLAN TCI */
3346        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
3347                igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14,
3348                                          sizeof(filter->vlan_tci), NULL);
3349
3350        /* Add Ether type */
3351        if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
3352                __be16 etype = cpu_to_be16(filter->etype);
3353
3354                igc_flex_filter_add_field(&flex, &etype, eth_offset,
3355                                          sizeof(etype), NULL);
3356        }
3357
3358        /* Add user data */
3359        if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA)
3360                igc_flex_filter_add_field(&flex, &filter->user_data,
3361                                          user_offset,
3362                                          sizeof(filter->user_data),
3363                                          filter->user_mask);
3364
3365        /* Add it down to the hardware and enable it. */
3366        ret = igc_write_flex_filter_ll(adapter, &flex);
3367        if (ret)
3368                return ret;
3369
3370        filter->flex_index = index;
3371
3372        return 0;
3373}
3374
3375static void igc_del_flex_filter(struct igc_adapter *adapter,
3376                                u16 reg_index)
3377{
3378        struct igc_hw *hw = &adapter->hw;
3379        u32 wufc;
3380
3381        /* Just disable the filter. The filter table itself is kept
3382         * intact. Another flex_filter_add() should override the "old" data
3383         * then.
3384         */
3385        if (reg_index > 8) {
3386                u32 wufc_ext = rd32(IGC_WUFC_EXT);
3387
3388                wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8));
3389                wr32(IGC_WUFC_EXT, wufc_ext);
3390        } else {
3391                wufc = rd32(IGC_WUFC);
3392
3393                wufc &= ~(IGC_WUFC_FLX0 << reg_index);
3394                wr32(IGC_WUFC, wufc);
3395        }
3396
3397        if (igc_flex_filter_in_use(adapter))
3398                return;
3399
3400        /* No filters are in use, we may disable flex filters */
3401        wufc = rd32(IGC_WUFC);
3402        wufc &= ~IGC_WUFC_FLEX_HQ;
3403        wr32(IGC_WUFC, wufc);
3404}
3405
3406static int igc_enable_nfc_rule(struct igc_adapter *adapter,
3407                               struct igc_nfc_rule *rule)
3408{
3409        int err;
3410
3411        if (rule->flex) {
3412                return igc_add_flex_filter(adapter, rule);
3413        }
3414
3415        if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
3416                err = igc_add_etype_filter(adapter, rule->filter.etype,
3417                                           rule->action);
3418                if (err)
3419                        return err;
3420        }
3421
3422        if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
3423                err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3424                                         rule->filter.src_addr, rule->action);
3425                if (err)
3426                        return err;
3427        }
3428
3429        if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
3430                err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3431                                         rule->filter.dst_addr, rule->action);
3432                if (err)
3433                        return err;
3434        }
3435
3436        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3437                int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3438                           VLAN_PRIO_SHIFT;
3439
3440                err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
3441                if (err)
3442                        return err;
3443        }
3444
3445        return 0;
3446}
3447
3448static void igc_disable_nfc_rule(struct igc_adapter *adapter,
3449                                 const struct igc_nfc_rule *rule)
3450{
3451        if (rule->flex) {
3452                igc_del_flex_filter(adapter, rule->filter.flex_index);
3453                return;
3454        }
3455
3456        if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
3457                igc_del_etype_filter(adapter, rule->filter.etype);
3458
3459        if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3460                int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3461                           VLAN_PRIO_SHIFT;
3462
3463                igc_del_vlan_prio_filter(adapter, prio);
3464        }
3465
3466        if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
3467                igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3468                                   rule->filter.src_addr);
3469
3470        if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
3471                igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3472                                   rule->filter.dst_addr);
3473}
3474
3475/**
3476 * igc_get_nfc_rule() - Get NFC rule
3477 * @adapter: Pointer to adapter
3478 * @location: Rule location
3479 *
3480 * Context: Expects adapter->nfc_rule_lock to be held by caller.
3481 *
3482 * Return: Pointer to NFC rule at @location. If not found, NULL.
3483 */
3484struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter,
3485                                      u32 location)
3486{
3487        struct igc_nfc_rule *rule;
3488
3489        list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
3490                if (rule->location == location)
3491                        return rule;
3492                if (rule->location > location)
3493                        break;
3494        }
3495
3496        return NULL;
3497}
3498
3499/**
3500 * igc_del_nfc_rule() - Delete NFC rule
3501 * @adapter: Pointer to adapter
3502 * @rule: Pointer to rule to be deleted
3503 *
3504 * Disable NFC rule in hardware and delete it from adapter.
3505 *
3506 * Context: Expects adapter->nfc_rule_lock to be held by caller.
3507 */
3508void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3509{
3510        igc_disable_nfc_rule(adapter, rule);
3511
3512        list_del(&rule->list);
3513        adapter->nfc_rule_count--;
3514
3515        kfree(rule);
3516}
3517
3518static void igc_flush_nfc_rules(struct igc_adapter *adapter)
3519{
3520        struct igc_nfc_rule *rule, *tmp;
3521
3522        mutex_lock(&adapter->nfc_rule_lock);
3523
3524        list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list)
3525                igc_del_nfc_rule(adapter, rule);
3526
3527        mutex_unlock(&adapter->nfc_rule_lock);
3528}
3529
3530/**
3531 * igc_add_nfc_rule() - Add NFC rule
3532 * @adapter: Pointer to adapter
3533 * @rule: Pointer to rule to be added
3534 *
3535 * Enable NFC rule in hardware and add it to adapter.
3536 *
3537 * Context: Expects adapter->nfc_rule_lock to be held by caller.
3538 *
3539 * Return: 0 on success, negative errno on failure.
3540 */
3541int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3542{
3543        struct igc_nfc_rule *pred, *cur;
3544        int err;
3545
3546        err = igc_enable_nfc_rule(adapter, rule);
3547        if (err)
3548                return err;
3549
3550        pred = NULL;
3551        list_for_each_entry(cur, &adapter->nfc_rule_list, list) {
3552                if (cur->location >= rule->location)
3553                        break;
3554                pred = cur;
3555        }
3556
3557        list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list);
3558        adapter->nfc_rule_count++;
3559        return 0;
3560}
3561
3562static void igc_restore_nfc_rules(struct igc_adapter *adapter)
3563{
3564        struct igc_nfc_rule *rule;
3565
3566        mutex_lock(&adapter->nfc_rule_lock);
3567
3568        list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
3569                igc_enable_nfc_rule(adapter, rule);
3570
3571        mutex_unlock(&adapter->nfc_rule_lock);
3572}
3573
3574static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
3575{
3576        struct igc_adapter *adapter = netdev_priv(netdev);
3577
3578        return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1);
3579}
3580
3581static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
3582{
3583        struct igc_adapter *adapter = netdev_priv(netdev);
3584
3585        igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr);
3586        return 0;
3587}
3588
3589/**
3590 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3591 * @netdev: network interface device structure
3592 *
3593 * The set_rx_mode entry point is called whenever the unicast or multicast
3594 * address lists or the network interface flags are updated.  This routine is
3595 * responsible for configuring the hardware for proper unicast, multicast,
3596 * promiscuous mode, and all-multi behavior.
3597 */
3598static void igc_set_rx_mode(struct net_device *netdev)
3599{
3600        struct igc_adapter *adapter = netdev_priv(netdev);
3601        struct igc_hw *hw = &adapter->hw;
3602        u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
3603        int count;
3604
3605        /* Check for Promiscuous and All Multicast modes */
3606        if (netdev->flags & IFF_PROMISC) {
3607                rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
3608        } else {
3609                if (netdev->flags & IFF_ALLMULTI) {
3610                        rctl |= IGC_RCTL_MPE;
3611                } else {
3612                        /* Write addresses to the MTA, if the attempt fails
3613                         * then we should just turn on promiscuous mode so
3614                         * that we can at least receive multicast traffic
3615                         */
3616                        count = igc_write_mc_addr_list(netdev);
3617                        if (count < 0)
3618                                rctl |= IGC_RCTL_MPE;
3619                }
3620        }
3621
3622        /* Write addresses to available RAR registers, if there is not
3623         * sufficient space to store all the addresses then enable
3624         * unicast promiscuous mode
3625         */
3626        if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
3627                rctl |= IGC_RCTL_UPE;
3628
3629        /* update state of unicast and multicast */
3630        rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
3631        wr32(IGC_RCTL, rctl);
3632
3633#if (PAGE_SIZE < 8192)
3634        if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
3635                rlpml = IGC_MAX_FRAME_BUILD_SKB;
3636#endif
3637        wr32(IGC_RLPML, rlpml);
3638}
3639
3640/**
3641 * igc_configure - configure the hardware for RX and TX
3642 * @adapter: private board structure
3643 */
3644static void igc_configure(struct igc_adapter *adapter)
3645{
3646        struct net_device *netdev = adapter->netdev;
3647        int i = 0;
3648
3649        igc_get_hw_control(adapter);
3650        igc_set_rx_mode(netdev);
3651
3652        igc_restore_vlan(adapter);
3653
3654        igc_setup_tctl(adapter);
3655        igc_setup_mrqc(adapter);
3656        igc_setup_rctl(adapter);
3657
3658        igc_set_default_mac_filter(adapter);
3659        igc_restore_nfc_rules(adapter);
3660
3661        igc_configure_tx(adapter);
3662        igc_configure_rx(adapter);
3663
3664        igc_rx_fifo_flush_base(&adapter->hw);
3665
3666        /* call igc_desc_unused which always leaves
3667         * at least 1 descriptor unused to make sure
3668         * next_to_use != next_to_clean
3669         */
3670        for (i = 0; i < adapter->num_rx_queues; i++) {
3671                struct igc_ring *ring = adapter->rx_ring[i];
3672
3673                if (ring->xsk_pool)
3674                        igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
3675                else
3676                        igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
3677        }
3678}
3679
3680/**
3681 * igc_write_ivar - configure ivar for given MSI-X vector
3682 * @hw: pointer to the HW structure
3683 * @msix_vector: vector number we are allocating to a given ring
3684 * @index: row index of IVAR register to write within IVAR table
3685 * @offset: column offset of in IVAR, should be multiple of 8
3686 *
3687 * The IVAR table consists of 2 columns,
3688 * each containing an cause allocation for an Rx and Tx ring, and a
3689 * variable number of rows depending on the number of queues supported.
3690 */
3691static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
3692                           int index, int offset)
3693{
3694        u32 ivar = array_rd32(IGC_IVAR0, index);
3695
3696        /* clear any bits that are currently set */
3697        ivar &= ~((u32)0xFF << offset);
3698
3699        /* write vector and valid bit */
3700        ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
3701
3702        array_wr32(IGC_IVAR0, index, ivar);
3703}
3704
3705static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
3706{
3707        struct igc_adapter *adapter = q_vector->adapter;
3708        struct igc_hw *hw = &adapter->hw;
3709        int rx_queue = IGC_N0_QUEUE;
3710        int tx_queue = IGC_N0_QUEUE;
3711
3712        if (q_vector->rx.ring)
3713                rx_queue = q_vector->rx.ring->reg_idx;
3714        if (q_vector->tx.ring)
3715                tx_queue = q_vector->tx.ring->reg_idx;
3716
3717        switch (hw->mac.type) {
3718        case igc_i225:
3719                if (rx_queue > IGC_N0_QUEUE)
3720                        igc_write_ivar(hw, msix_vector,
3721                                       rx_queue >> 1,
3722                                       (rx_queue & 0x1) << 4);
3723                if (tx_queue > IGC_N0_QUEUE)
3724                        igc_write_ivar(hw, msix_vector,
3725                                       tx_queue >> 1,
3726                                       ((tx_queue & 0x1) << 4) + 8);
3727                q_vector->eims_value = BIT(msix_vector);
3728                break;
3729        default:
3730                WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
3731                break;
3732        }
3733
3734        /* add q_vector eims value to global eims_enable_mask */
3735        adapter->eims_enable_mask |= q_vector->eims_value;
3736
3737        /* configure q_vector to set itr on first interrupt */
3738        q_vector->set_itr = 1;
3739}
3740
3741/**
3742 * igc_configure_msix - Configure MSI-X hardware
3743 * @adapter: Pointer to adapter structure
3744 *
3745 * igc_configure_msix sets up the hardware to properly
3746 * generate MSI-X interrupts.
3747 */
3748static void igc_configure_msix(struct igc_adapter *adapter)
3749{
3750        struct igc_hw *hw = &adapter->hw;
3751        int i, vector = 0;
3752        u32 tmp;
3753
3754        adapter->eims_enable_mask = 0;
3755
3756        /* set vector for other causes, i.e. link changes */
3757        switch (hw->mac.type) {
3758        case igc_i225:
3759                /* Turn on MSI-X capability first, or our settings
3760                 * won't stick.  And it will take days to debug.
3761                 */
3762                wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
3763                     IGC_GPIE_PBA | IGC_GPIE_EIAME |
3764                     IGC_GPIE_NSICR);
3765
3766                /* enable msix_other interrupt */
3767                adapter->eims_other = BIT(vector);
3768                tmp = (vector++ | IGC_IVAR_VALID) << 8;
3769
3770                wr32(IGC_IVAR_MISC, tmp);
3771                break;
3772        default:
3773                /* do nothing, since nothing else supports MSI-X */
3774                break;
3775        } /* switch (hw->mac.type) */
3776
3777        adapter->eims_enable_mask |= adapter->eims_other;
3778
3779        for (i = 0; i < adapter->num_q_vectors; i++)
3780                igc_assign_vector(adapter->q_vector[i], vector++);
3781
3782        wrfl();
3783}
3784
3785/**
3786 * igc_irq_enable - Enable default interrupt generation settings
3787 * @adapter: board private structure
3788 */
3789static void igc_irq_enable(struct igc_adapter *adapter)
3790{
3791        struct igc_hw *hw = &adapter->hw;
3792
3793        if (adapter->msix_entries) {
3794                u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
3795                u32 regval = rd32(IGC_EIAC);
3796
3797                wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
3798                regval = rd32(IGC_EIAM);
3799                wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
3800                wr32(IGC_EIMS, adapter->eims_enable_mask);
3801                wr32(IGC_IMS, ims);
3802        } else {
3803                wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3804                wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3805        }
3806}
3807
3808/**
3809 * igc_irq_disable - Mask off interrupt generation on the NIC
3810 * @adapter: board private structure
3811 */
3812static void igc_irq_disable(struct igc_adapter *adapter)
3813{
3814        struct igc_hw *hw = &adapter->hw;
3815
3816        if (adapter->msix_entries) {
3817                u32 regval = rd32(IGC_EIAM);
3818
3819                wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
3820                wr32(IGC_EIMC, adapter->eims_enable_mask);
3821                regval = rd32(IGC_EIAC);
3822                wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
3823        }
3824
3825        wr32(IGC_IAM, 0);
3826        wr32(IGC_IMC, ~0);
3827        wrfl();
3828
3829        if (adapter->msix_entries) {
3830                int vector = 0, i;
3831
3832                synchronize_irq(adapter->msix_entries[vector++].vector);
3833
3834                for (i = 0; i < adapter->num_q_vectors; i++)
3835                        synchronize_irq(adapter->msix_entries[vector++].vector);
3836        } else {
3837                synchronize_irq(adapter->pdev->irq);
3838        }
3839}
3840
3841void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
3842                              const u32 max_rss_queues)
3843{
3844        /* Determine if we need to pair queues. */
3845        /* If rss_queues > half of max_rss_queues, pair the queues in
3846         * order to conserve interrupts due to limited supply.
3847         */
3848        if (adapter->rss_queues > (max_rss_queues / 2))
3849                adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
3850        else
3851                adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
3852}
3853
3854unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
3855{
3856        return IGC_MAX_RX_QUEUES;
3857}
3858
3859static void igc_init_queue_configuration(struct igc_adapter *adapter)
3860{
3861        u32 max_rss_queues;
3862
3863        max_rss_queues = igc_get_max_rss_queues(adapter);
3864        adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
3865
3866        igc_set_flag_queue_pairs(adapter, max_rss_queues);
3867}
3868
3869/**
3870 * igc_reset_q_vector - Reset config for interrupt vector
3871 * @adapter: board private structure to initialize
3872 * @v_idx: Index of vector to be reset
3873 *
3874 * If NAPI is enabled it will delete any references to the
3875 * NAPI struct. This is preparation for igc_free_q_vector.
3876 */
3877static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
3878{
3879        struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
3880
3881        /* if we're coming from igc_set_interrupt_capability, the vectors are
3882         * not yet allocated
3883         */
3884        if (!q_vector)
3885                return;
3886
3887        if (q_vector->tx.ring)
3888                adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
3889
3890        if (q_vector->rx.ring)
3891                adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
3892
3893        netif_napi_del(&q_vector->napi);
3894}
3895
3896/**
3897 * igc_free_q_vector - Free memory allocated for specific interrupt vector
3898 * @adapter: board private structure to initialize
3899 * @v_idx: Index of vector to be freed
3900 *
3901 * This function frees the memory allocated to the q_vector.
3902 */
3903static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
3904{
3905        struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
3906
3907        adapter->q_vector[v_idx] = NULL;
3908
3909        /* igc_get_stats64() might access the rings on this vector,
3910         * we must wait a grace period before freeing it.
3911         */
3912        if (q_vector)
3913                kfree_rcu(q_vector, rcu);
3914}
3915
3916/**
3917 * igc_free_q_vectors - Free memory allocated for interrupt vectors
3918 * @adapter: board private structure to initialize
3919 *
3920 * This function frees the memory allocated to the q_vectors.  In addition if
3921 * NAPI is enabled it will delete any references to the NAPI struct prior
3922 * to freeing the q_vector.
3923 */
3924static void igc_free_q_vectors(struct igc_adapter *adapter)
3925{
3926        int v_idx = adapter->num_q_vectors;
3927
3928        adapter->num_tx_queues = 0;
3929        adapter->num_rx_queues = 0;
3930        adapter->num_q_vectors = 0;
3931
3932        while (v_idx--) {
3933                igc_reset_q_vector(adapter, v_idx);
3934                igc_free_q_vector(adapter, v_idx);
3935        }
3936}
3937
3938/**
3939 * igc_update_itr - update the dynamic ITR value based on statistics
3940 * @q_vector: pointer to q_vector
3941 * @ring_container: ring info to update the itr for
3942 *
3943 * Stores a new ITR value based on packets and byte
3944 * counts during the last interrupt.  The advantage of per interrupt
3945 * computation is faster updates and more accurate ITR for the current
3946 * traffic pattern.  Constants in this function were computed
3947 * based on theoretical maximum wire speed and thresholds were set based
3948 * on testing data as well as attempting to minimize response time
3949 * while increasing bulk throughput.
3950 * NOTE: These calculations are only valid when operating in a single-
3951 * queue environment.
3952 */
3953static void igc_update_itr(struct igc_q_vector *q_vector,
3954                           struct igc_ring_container *ring_container)
3955{
3956        unsigned int packets = ring_container->total_packets;
3957        unsigned int bytes = ring_container->total_bytes;
3958        u8 itrval = ring_container->itr;
3959
3960        /* no packets, exit with status unchanged */
3961        if (packets == 0)
3962                return;
3963
3964        switch (itrval) {
3965        case lowest_latency:
3966                /* handle TSO and jumbo frames */
3967                if (bytes / packets > 8000)
3968                        itrval = bulk_latency;
3969                else if ((packets < 5) && (bytes > 512))
3970                        itrval = low_latency;
3971                break;
3972        case low_latency:  /* 50 usec aka 20000 ints/s */
3973                if (bytes > 10000) {
3974                        /* this if handles the TSO accounting */
3975                        if (bytes / packets > 8000)
3976                                itrval = bulk_latency;
3977                        else if ((packets < 10) || ((bytes / packets) > 1200))
3978                                itrval = bulk_latency;
3979                        else if ((packets > 35))
3980                                itrval = lowest_latency;
3981                } else if (bytes / packets > 2000) {
3982                        itrval = bulk_latency;
3983                } else if (packets <= 2 && bytes < 512) {
3984                        itrval = lowest_latency;
3985                }
3986                break;
3987        case bulk_latency: /* 250 usec aka 4000 ints/s */
3988                if (bytes > 25000) {
3989                        if (packets > 35)
3990                                itrval = low_latency;
3991                } else if (bytes < 1500) {
3992                        itrval = low_latency;
3993                }
3994                break;
3995        }
3996
3997        /* clear work counters since we have the values we need */
3998        ring_container->total_bytes = 0;
3999        ring_container->total_packets = 0;
4000
4001        /* write updated itr to ring container */
4002        ring_container->itr = itrval;
4003}
4004
4005static void igc_set_itr(struct igc_q_vector *q_vector)
4006{
4007        struct igc_adapter *adapter = q_vector->adapter;
4008        u32 new_itr = q_vector->itr_val;
4009        u8 current_itr = 0;
4010
4011        /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4012        switch (adapter->link_speed) {
4013        case SPEED_10:
4014        case SPEED_100:
4015                current_itr = 0;
4016                new_itr = IGC_4K_ITR;
4017                goto set_itr_now;
4018        default:
4019                break;
4020        }
4021
4022        igc_update_itr(q_vector, &q_vector->tx);
4023        igc_update_itr(q_vector, &q_vector->rx);
4024
4025        current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4026
4027        /* conservative mode (itr 3) eliminates the lowest_latency setting */
4028        if (current_itr == lowest_latency &&
4029            ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4030            (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4031                current_itr = low_latency;
4032
4033        switch (current_itr) {
4034        /* counts and packets in update_itr are dependent on these numbers */
4035        case lowest_latency:
4036                new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
4037                break;
4038        case low_latency:
4039                new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
4040                break;
4041        case bulk_latency:
4042                new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
4043                break;
4044        default:
4045                break;
4046        }
4047
4048set_itr_now:
4049        if (new_itr != q_vector->itr_val) {
4050                /* this attempts to bias the interrupt rate towards Bulk
4051                 * by adding intermediate steps when interrupt rate is
4052                 * increasing
4053                 */
4054                new_itr = new_itr > q_vector->itr_val ?
4055                          max((new_itr * q_vector->itr_val) /
4056                          (new_itr + (q_vector->itr_val >> 2)),
4057                          new_itr) : new_itr;
4058                /* Don't write the value here; it resets the adapter's
4059                 * internal timer, and causes us to delay far longer than
4060                 * we should between interrupts.  Instead, we write the ITR
4061                 * value at the beginning of the next interrupt so the timing
4062                 * ends up being correct.
4063                 */
4064                q_vector->itr_val = new_itr;
4065                q_vector->set_itr = 1;
4066        }
4067}
4068
4069static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
4070{
4071        int v_idx = adapter->num_q_vectors;
4072
4073        if (adapter->msix_entries) {
4074                pci_disable_msix(adapter->pdev);
4075                kfree(adapter->msix_entries);
4076                adapter->msix_entries = NULL;
4077        } else if (adapter->flags & IGC_FLAG_HAS_MSI) {
4078                pci_disable_msi(adapter->pdev);
4079        }
4080
4081        while (v_idx--)
4082                igc_reset_q_vector(adapter, v_idx);
4083}
4084
4085/**
4086 * igc_set_interrupt_capability - set MSI or MSI-X if supported
4087 * @adapter: Pointer to adapter structure
4088 * @msix: boolean value for MSI-X capability
4089 *
4090 * Attempt to configure interrupts using the best available
4091 * capabilities of the hardware and kernel.
4092 */
4093static void igc_set_interrupt_capability(struct igc_adapter *adapter,
4094                                         bool msix)
4095{
4096        int numvecs, i;
4097        int err;
4098
4099        if (!msix)
4100                goto msi_only;
4101        adapter->flags |= IGC_FLAG_HAS_MSIX;
4102
4103        /* Number of supported queues. */
4104        adapter->num_rx_queues = adapter->rss_queues;
4105
4106        adapter->num_tx_queues = adapter->rss_queues;
4107
4108        /* start with one vector for every Rx queue */
4109        numvecs = adapter->num_rx_queues;
4110
4111        /* if Tx handler is separate add 1 for every Tx queue */
4112        if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
4113                numvecs += adapter->num_tx_queues;
4114
4115        /* store the number of vectors reserved for queues */
4116        adapter->num_q_vectors = numvecs;
4117
4118        /* add 1 vector for link status interrupts */
4119        numvecs++;
4120
4121        adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
4122                                        GFP_KERNEL);
4123
4124        if (!adapter->msix_entries)
4125                return;
4126
4127        /* populate entry values */
4128        for (i = 0; i < numvecs; i++)
4129                adapter->msix_entries[i].entry = i;
4130
4131        err = pci_enable_msix_range(adapter->pdev,
4132                                    adapter->msix_entries,
4133                                    numvecs,
4134                                    numvecs);
4135        if (err > 0)
4136                return;
4137
4138        kfree(adapter->msix_entries);
4139        adapter->msix_entries = NULL;
4140
4141        igc_reset_interrupt_capability(adapter);
4142
4143msi_only:
4144        adapter->flags &= ~IGC_FLAG_HAS_MSIX;
4145
4146        adapter->rss_queues = 1;
4147        adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
4148        adapter->num_rx_queues = 1;
4149        adapter->num_tx_queues = 1;
4150        adapter->num_q_vectors = 1;
4151        if (!pci_enable_msi(adapter->pdev))
4152                adapter->flags |= IGC_FLAG_HAS_MSI;
4153}
4154
4155/**
4156 * igc_update_ring_itr - update the dynamic ITR value based on packet size
4157 * @q_vector: pointer to q_vector
4158 *
4159 * Stores a new ITR value based on strictly on packet size.  This
4160 * algorithm is less sophisticated than that used in igc_update_itr,
4161 * due to the difficulty of synchronizing statistics across multiple
4162 * receive rings.  The divisors and thresholds used by this function
4163 * were determined based on theoretical maximum wire speed and testing
4164 * data, in order to minimize response time while increasing bulk
4165 * throughput.
4166 * NOTE: This function is called only when operating in a multiqueue
4167 * receive environment.
4168 */
4169static void igc_update_ring_itr(struct igc_q_vector *q_vector)
4170{
4171        struct igc_adapter *adapter = q_vector->adapter;
4172        int new_val = q_vector->itr_val;
4173        int avg_wire_size = 0;
4174        unsigned int packets;
4175
4176        /* For non-gigabit speeds, just fix the interrupt rate at 4000
4177         * ints/sec - ITR timer value of 120 ticks.
4178         */
4179        switch (adapter->link_speed) {
4180        case SPEED_10:
4181        case SPEED_100:
4182                new_val = IGC_4K_ITR;
4183                goto set_itr_val;
4184        default:
4185                break;
4186        }
4187
4188        packets = q_vector->rx.total_packets;
4189        if (packets)
4190                avg_wire_size = q_vector->rx.total_bytes / packets;
4191
4192        packets = q_vector->tx.total_packets;
4193        if (packets)
4194                avg_wire_size = max_t(u32, avg_wire_size,
4195                                      q_vector->tx.total_bytes / packets);
4196
4197        /* if avg_wire_size isn't set no work was done */
4198        if (!avg_wire_size)
4199                goto clear_counts;
4200
4201        /* Add 24 bytes to size to account for CRC, preamble, and gap */
4202        avg_wire_size += 24;
4203
4204        /* Don't starve jumbo frames */
4205        avg_wire_size = min(avg_wire_size, 3000);
4206
4207        /* Give a little boost to mid-size frames */
4208        if (avg_wire_size > 300 && avg_wire_size < 1200)
4209                new_val = avg_wire_size / 3;
4210        else
4211                new_val = avg_wire_size / 2;
4212
4213        /* conservative mode (itr 3) eliminates the lowest_latency setting */
4214        if (new_val < IGC_20K_ITR &&
4215            ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4216            (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4217                new_val = IGC_20K_ITR;
4218
4219set_itr_val:
4220        if (new_val != q_vector->itr_val) {
4221                q_vector->itr_val = new_val;
4222                q_vector->set_itr = 1;
4223        }
4224clear_counts:
4225        q_vector->rx.total_bytes = 0;
4226        q_vector->rx.total_packets = 0;
4227        q_vector->tx.total_bytes = 0;
4228        q_vector->tx.total_packets = 0;
4229}
4230
4231static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
4232{
4233        struct igc_adapter *adapter = q_vector->adapter;
4234        struct igc_hw *hw = &adapter->hw;
4235
4236        if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
4237            (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
4238                if (adapter->num_q_vectors == 1)
4239                        igc_set_itr(q_vector);
4240                else
4241                        igc_update_ring_itr(q_vector);
4242        }
4243
4244        if (!test_bit(__IGC_DOWN, &adapter->state)) {
4245                if (adapter->msix_entries)
4246                        wr32(IGC_EIMS, q_vector->eims_value);
4247                else
4248                        igc_irq_enable(adapter);
4249        }
4250}
4251
4252static void igc_add_ring(struct igc_ring *ring,
4253                         struct igc_ring_container *head)
4254{
4255        head->ring = ring;
4256        head->count++;
4257}
4258
4259/**
4260 * igc_cache_ring_register - Descriptor ring to register mapping
4261 * @adapter: board private structure to initialize
4262 *
4263 * Once we know the feature-set enabled for the device, we'll cache
4264 * the register offset the descriptor ring is assigned to.
4265 */
4266static void igc_cache_ring_register(struct igc_adapter *adapter)
4267{
4268        int i = 0, j = 0;
4269
4270        switch (adapter->hw.mac.type) {
4271        case igc_i225:
4272        default:
4273                for (; i < adapter->num_rx_queues; i++)
4274                        adapter->rx_ring[i]->reg_idx = i;
4275                for (; j < adapter->num_tx_queues; j++)
4276                        adapter->tx_ring[j]->reg_idx = j;
4277                break;
4278        }
4279}
4280
4281/**
4282 * igc_poll - NAPI Rx polling callback
4283 * @napi: napi polling structure
4284 * @budget: count of how many packets we should handle
4285 */
4286static int igc_poll(struct napi_struct *napi, int budget)
4287{
4288        struct igc_q_vector *q_vector = container_of(napi,
4289                                                     struct igc_q_vector,
4290                                                     napi);
4291        struct igc_ring *rx_ring = q_vector->rx.ring;
4292        bool clean_complete = true;
4293        int work_done = 0;
4294
4295        if (q_vector->tx.ring)
4296                clean_complete = igc_clean_tx_irq(q_vector, budget);
4297
4298        if (rx_ring) {
4299                int cleaned = rx_ring->xsk_pool ?
4300                              igc_clean_rx_irq_zc(q_vector, budget) :
4301                              igc_clean_rx_irq(q_vector, budget);
4302
4303                work_done += cleaned;
4304                if (cleaned >= budget)
4305                        clean_complete = false;
4306        }
4307
4308        /* If all work not completed, return budget and keep polling */
4309        if (!clean_complete)
4310                return budget;
4311
4312        /* Exit the polling mode, but don't re-enable interrupts if stack might
4313         * poll us due to busy-polling
4314         */
4315        if (likely(napi_complete_done(napi, work_done)))
4316                igc_ring_irq_enable(q_vector);
4317
4318        return min(work_done, budget - 1);
4319}
4320
4321/**
4322 * igc_alloc_q_vector - Allocate memory for a single interrupt vector
4323 * @adapter: board private structure to initialize
4324 * @v_count: q_vectors allocated on adapter, used for ring interleaving
4325 * @v_idx: index of vector in adapter struct
4326 * @txr_count: total number of Tx rings to allocate
4327 * @txr_idx: index of first Tx ring to allocate
4328 * @rxr_count: total number of Rx rings to allocate
4329 * @rxr_idx: index of first Rx ring to allocate
4330 *
4331 * We allocate one q_vector.  If allocation fails we return -ENOMEM.
4332 */
4333static int igc_alloc_q_vector(struct igc_adapter *adapter,
4334                              unsigned int v_count, unsigned int v_idx,
4335                              unsigned int txr_count, unsigned int txr_idx,
4336                              unsigned int rxr_count, unsigned int rxr_idx)
4337{
4338        struct igc_q_vector *q_vector;
4339        struct igc_ring *ring;
4340        int ring_count;
4341
4342        /* igc only supports 1 Tx and/or 1 Rx queue per vector */
4343        if (txr_count > 1 || rxr_count > 1)
4344                return -ENOMEM;
4345
4346        ring_count = txr_count + rxr_count;
4347
4348        /* allocate q_vector and rings */
4349        q_vector = adapter->q_vector[v_idx];
4350        if (!q_vector)
4351                q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
4352                                   GFP_KERNEL);
4353        else
4354                memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
4355        if (!q_vector)
4356                return -ENOMEM;
4357
4358        /* initialize NAPI */
4359        netif_napi_add(adapter->netdev, &q_vector->napi,
4360                       igc_poll, 64);
4361
4362        /* tie q_vector and adapter together */
4363        adapter->q_vector[v_idx] = q_vector;
4364        q_vector->adapter = adapter;
4365
4366        /* initialize work limits */
4367        q_vector->tx.work_limit = adapter->tx_work_limit;
4368
4369        /* initialize ITR configuration */
4370        q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
4371        q_vector->itr_val = IGC_START_ITR;
4372
4373        /* initialize pointer to rings */
4374        ring = q_vector->ring;
4375
4376        /* initialize ITR */
4377        if (rxr_count) {
4378                /* rx or rx/tx vector */
4379                if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
4380                        q_vector->itr_val = adapter->rx_itr_setting;
4381        } else {
4382                /* tx only vector */
4383                if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
4384                        q_vector->itr_val = adapter->tx_itr_setting;
4385        }
4386
4387        if (txr_count) {
4388                /* assign generic ring traits */
4389                ring->dev = &adapter->pdev->dev;
4390                ring->netdev = adapter->netdev;
4391
4392                /* configure backlink on ring */
4393                ring->q_vector = q_vector;
4394
4395                /* update q_vector Tx values */
4396                igc_add_ring(ring, &q_vector->tx);
4397
4398                /* apply Tx specific ring traits */
4399                ring->count = adapter->tx_ring_count;
4400                ring->queue_index = txr_idx;
4401
4402                /* assign ring to adapter */
4403                adapter->tx_ring[txr_idx] = ring;
4404
4405                /* push pointer to next ring */
4406                ring++;
4407        }
4408
4409        if (rxr_count) {
4410                /* assign generic ring traits */
4411                ring->dev = &adapter->pdev->dev;
4412                ring->netdev = adapter->netdev;
4413
4414                /* configure backlink on ring */
4415                ring->q_vector = q_vector;
4416
4417                /* update q_vector Rx values */
4418                igc_add_ring(ring, &q_vector->rx);
4419
4420                /* apply Rx specific ring traits */
4421                ring->count = adapter->rx_ring_count;
4422                ring->queue_index = rxr_idx;
4423
4424                /* assign ring to adapter */
4425                adapter->rx_ring[rxr_idx] = ring;
4426        }
4427
4428        return 0;
4429}
4430
4431/**
4432 * igc_alloc_q_vectors - Allocate memory for interrupt vectors
4433 * @adapter: board private structure to initialize
4434 *
4435 * We allocate one q_vector per queue interrupt.  If allocation fails we
4436 * return -ENOMEM.
4437 */
4438static int igc_alloc_q_vectors(struct igc_adapter *adapter)
4439{
4440        int rxr_remaining = adapter->num_rx_queues;
4441        int txr_remaining = adapter->num_tx_queues;
4442        int rxr_idx = 0, txr_idx = 0, v_idx = 0;
4443        int q_vectors = adapter->num_q_vectors;
4444        int err;
4445
4446        if (q_vectors >= (rxr_remaining + txr_remaining)) {
4447                for (; rxr_remaining; v_idx++) {
4448                        err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4449                                                 0, 0, 1, rxr_idx);
4450
4451                        if (err)
4452                                goto err_out;
4453
4454                        /* update counts and index */
4455                        rxr_remaining--;
4456                        rxr_idx++;
4457                }
4458        }
4459
4460        for (; v_idx < q_vectors; v_idx++) {
4461                int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
4462                int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
4463
4464                err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4465                                         tqpv, txr_idx, rqpv, rxr_idx);
4466
4467                if (err)
4468                        goto err_out;
4469
4470                /* update counts and index */
4471                rxr_remaining -= rqpv;
4472                txr_remaining -= tqpv;
4473                rxr_idx++;
4474                txr_idx++;
4475        }
4476
4477        return 0;
4478
4479err_out:
4480        adapter->num_tx_queues = 0;
4481        adapter->num_rx_queues = 0;
4482        adapter->num_q_vectors = 0;
4483
4484        while (v_idx--)
4485                igc_free_q_vector(adapter, v_idx);
4486
4487        return -ENOMEM;
4488}
4489
4490/**
4491 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
4492 * @adapter: Pointer to adapter structure
4493 * @msix: boolean for MSI-X capability
4494 *
4495 * This function initializes the interrupts and allocates all of the queues.
4496 */
4497static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
4498{
4499        struct net_device *dev = adapter->netdev;
4500        int err = 0;
4501
4502        igc_set_interrupt_capability(adapter, msix);
4503
4504        err = igc_alloc_q_vectors(adapter);
4505        if (err) {
4506                netdev_err(dev, "Unable to allocate memory for vectors\n");
4507                goto err_alloc_q_vectors;
4508        }
4509
4510        igc_cache_ring_register(adapter);
4511
4512        return 0;
4513
4514err_alloc_q_vectors:
4515        igc_reset_interrupt_capability(adapter);
4516        return err;
4517}
4518
4519/**
4520 * igc_sw_init - Initialize general software structures (struct igc_adapter)
4521 * @adapter: board private structure to initialize
4522 *
4523 * igc_sw_init initializes the Adapter private data structure.
4524 * Fields are initialized based on PCI device information and
4525 * OS network device settings (MTU size).
4526 */
4527static int igc_sw_init(struct igc_adapter *adapter)
4528{
4529        struct net_device *netdev = adapter->netdev;
4530        struct pci_dev *pdev = adapter->pdev;
4531        struct igc_hw *hw = &adapter->hw;
4532
4533        pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
4534
4535        /* set default ring sizes */
4536        adapter->tx_ring_count = IGC_DEFAULT_TXD;
4537        adapter->rx_ring_count = IGC_DEFAULT_RXD;
4538
4539        /* set default ITR values */
4540        adapter->rx_itr_setting = IGC_DEFAULT_ITR;
4541        adapter->tx_itr_setting = IGC_DEFAULT_ITR;
4542
4543        /* set default work limits */
4544        adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
4545
4546        /* adjust max frame to be at least the size of a standard frame */
4547        adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
4548                                VLAN_HLEN;
4549        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
4550
4551        mutex_init(&adapter->nfc_rule_lock);
4552        INIT_LIST_HEAD(&adapter->nfc_rule_list);
4553        adapter->nfc_rule_count = 0;
4554
4555        spin_lock_init(&adapter->stats64_lock);
4556        /* Assume MSI-X interrupts, will be checked during IRQ allocation */
4557        adapter->flags |= IGC_FLAG_HAS_MSIX;
4558
4559        igc_init_queue_configuration(adapter);
4560
4561        /* This call may decrease the number of queues */
4562        if (igc_init_interrupt_scheme(adapter, true)) {
4563                netdev_err(netdev, "Unable to allocate memory for queues\n");
4564                return -ENOMEM;
4565        }
4566
4567        /* Explicitly disable IRQ since the NIC can be in any state. */
4568        igc_irq_disable(adapter);
4569
4570        set_bit(__IGC_DOWN, &adapter->state);
4571
4572        return 0;
4573}
4574
4575/**
4576 * igc_up - Open the interface and prepare it to handle traffic
4577 * @adapter: board private structure
4578 */
4579void igc_up(struct igc_adapter *adapter)
4580{
4581        struct igc_hw *hw = &adapter->hw;
4582        int i = 0;
4583
4584        /* hardware has been reset, we need to reload some things */
4585        igc_configure(adapter);
4586
4587        clear_bit(__IGC_DOWN, &adapter->state);
4588
4589        for (i = 0; i < adapter->num_q_vectors; i++)
4590                napi_enable(&adapter->q_vector[i]->napi);
4591
4592        if (adapter->msix_entries)
4593                igc_configure_msix(adapter);
4594        else
4595                igc_assign_vector(adapter->q_vector[0], 0);
4596
4597        /* Clear any pending interrupts. */
4598        rd32(IGC_ICR);
4599        igc_irq_enable(adapter);
4600
4601        netif_tx_start_all_queues(adapter->netdev);
4602
4603        /* start the watchdog. */
4604        hw->mac.get_link_status = true;
4605        schedule_work(&adapter->watchdog_task);
4606}
4607
4608/**
4609 * igc_update_stats - Update the board statistics counters
4610 * @adapter: board private structure
4611 */
4612void igc_update_stats(struct igc_adapter *adapter)
4613{
4614        struct rtnl_link_stats64 *net_stats = &adapter->stats64;
4615        struct pci_dev *pdev = adapter->pdev;
4616        struct igc_hw *hw = &adapter->hw;
4617        u64 _bytes, _packets;
4618        u64 bytes, packets;
4619        unsigned int start;
4620        u32 mpc;
4621        int i;
4622
4623        /* Prevent stats update while adapter is being reset, or if the pci
4624         * connection is down.
4625         */
4626        if (adapter->link_speed == 0)
4627                return;
4628        if (pci_channel_offline(pdev))
4629                return;
4630
4631        packets = 0;
4632        bytes = 0;
4633
4634        rcu_read_lock();
4635        for (i = 0; i < adapter->num_rx_queues; i++) {
4636                struct igc_ring *ring = adapter->rx_ring[i];
4637                u32 rqdpc = rd32(IGC_RQDPC(i));
4638
4639                if (hw->mac.type >= igc_i225)
4640                        wr32(IGC_RQDPC(i), 0);
4641
4642                if (rqdpc) {
4643                        ring->rx_stats.drops += rqdpc;
4644                        net_stats->rx_fifo_errors += rqdpc;
4645                }
4646
4647                do {
4648                        start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
4649                        _bytes = ring->rx_stats.bytes;
4650                        _packets = ring->rx_stats.packets;
4651                } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
4652                bytes += _bytes;
4653                packets += _packets;
4654        }
4655
4656        net_stats->rx_bytes = bytes;
4657        net_stats->rx_packets = packets;
4658
4659        packets = 0;
4660        bytes = 0;
4661        for (i = 0; i < adapter->num_tx_queues; i++) {
4662                struct igc_ring *ring = adapter->tx_ring[i];
4663
4664                do {
4665                        start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
4666                        _bytes = ring->tx_stats.bytes;
4667                        _packets = ring->tx_stats.packets;
4668                } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
4669                bytes += _bytes;
4670                packets += _packets;
4671        }
4672        net_stats->tx_bytes = bytes;
4673        net_stats->tx_packets = packets;
4674        rcu_read_unlock();
4675
4676        /* read stats registers */
4677        adapter->stats.crcerrs += rd32(IGC_CRCERRS);
4678        adapter->stats.gprc += rd32(IGC_GPRC);
4679        adapter->stats.gorc += rd32(IGC_GORCL);
4680        rd32(IGC_GORCH); /* clear GORCL */
4681        adapter->stats.bprc += rd32(IGC_BPRC);
4682        adapter->stats.mprc += rd32(IGC_MPRC);
4683        adapter->stats.roc += rd32(IGC_ROC);
4684
4685        adapter->stats.prc64 += rd32(IGC_PRC64);
4686        adapter->stats.prc127 += rd32(IGC_PRC127);
4687        adapter->stats.prc255 += rd32(IGC_PRC255);
4688        adapter->stats.prc511 += rd32(IGC_PRC511);
4689        adapter->stats.prc1023 += rd32(IGC_PRC1023);
4690        adapter->stats.prc1522 += rd32(IGC_PRC1522);
4691        adapter->stats.tlpic += rd32(IGC_TLPIC);
4692        adapter->stats.rlpic += rd32(IGC_RLPIC);
4693        adapter->stats.hgptc += rd32(IGC_HGPTC);
4694
4695        mpc = rd32(IGC_MPC);
4696        adapter->stats.mpc += mpc;
4697        net_stats->rx_fifo_errors += mpc;
4698        adapter->stats.scc += rd32(IGC_SCC);
4699        adapter->stats.ecol += rd32(IGC_ECOL);
4700        adapter->stats.mcc += rd32(IGC_MCC);
4701        adapter->stats.latecol += rd32(IGC_LATECOL);
4702        adapter->stats.dc += rd32(IGC_DC);
4703        adapter->stats.rlec += rd32(IGC_RLEC);
4704        adapter->stats.xonrxc += rd32(IGC_XONRXC);
4705        adapter->stats.xontxc += rd32(IGC_XONTXC);
4706        adapter->stats.xoffrxc += rd32(IGC_XOFFRXC);
4707        adapter->stats.xofftxc += rd32(IGC_XOFFTXC);
4708        adapter->stats.fcruc += rd32(IGC_FCRUC);
4709        adapter->stats.gptc += rd32(IGC_GPTC);
4710        adapter->stats.gotc += rd32(IGC_GOTCL);
4711        rd32(IGC_GOTCH); /* clear GOTCL */
4712        adapter->stats.rnbc += rd32(IGC_RNBC);
4713        adapter->stats.ruc += rd32(IGC_RUC);
4714        adapter->stats.rfc += rd32(IGC_RFC);
4715        adapter->stats.rjc += rd32(IGC_RJC);
4716        adapter->stats.tor += rd32(IGC_TORH);
4717        adapter->stats.tot += rd32(IGC_TOTH);
4718        adapter->stats.tpr += rd32(IGC_TPR);
4719
4720        adapter->stats.ptc64 += rd32(IGC_PTC64);
4721        adapter->stats.ptc127 += rd32(IGC_PTC127);
4722        adapter->stats.ptc255 += rd32(IGC_PTC255);
4723        adapter->stats.ptc511 += rd32(IGC_PTC511);
4724        adapter->stats.ptc1023 += rd32(IGC_PTC1023);
4725        adapter->stats.ptc1522 += rd32(IGC_PTC1522);
4726
4727        adapter->stats.mptc += rd32(IGC_MPTC);
4728        adapter->stats.bptc += rd32(IGC_BPTC);
4729
4730        adapter->stats.tpt += rd32(IGC_TPT);
4731        adapter->stats.colc += rd32(IGC_COLC);
4732        adapter->stats.colc += rd32(IGC_RERC);
4733
4734        adapter->stats.algnerrc += rd32(IGC_ALGNERRC);
4735
4736        adapter->stats.tsctc += rd32(IGC_TSCTC);
4737
4738        adapter->stats.iac += rd32(IGC_IAC);
4739
4740        /* Fill out the OS statistics structure */
4741        net_stats->multicast = adapter->stats.mprc;
4742        net_stats->collisions = adapter->stats.colc;
4743
4744        /* Rx Errors */
4745
4746        /* RLEC on some newer hardware can be incorrect so build
4747         * our own version based on RUC and ROC
4748         */
4749        net_stats->rx_errors = adapter->stats.rxerrc +
4750                adapter->stats.crcerrs + adapter->stats.algnerrc +
4751                adapter->stats.ruc + adapter->stats.roc +
4752                adapter->stats.cexterr;
4753        net_stats->rx_length_errors = adapter->stats.ruc +
4754                                      adapter->stats.roc;
4755        net_stats->rx_crc_errors = adapter->stats.crcerrs;
4756        net_stats->rx_frame_errors = adapter->stats.algnerrc;
4757        net_stats->rx_missed_errors = adapter->stats.mpc;
4758
4759        /* Tx Errors */
4760        net_stats->tx_errors = adapter->stats.ecol +
4761                               adapter->stats.latecol;
4762        net_stats->tx_aborted_errors = adapter->stats.ecol;
4763        net_stats->tx_window_errors = adapter->stats.latecol;
4764        net_stats->tx_carrier_errors = adapter->stats.tncrs;
4765
4766        /* Tx Dropped needs to be maintained elsewhere */
4767
4768        /* Management Stats */
4769        adapter->stats.mgptc += rd32(IGC_MGTPTC);
4770        adapter->stats.mgprc += rd32(IGC_MGTPRC);
4771        adapter->stats.mgpdc += rd32(IGC_MGTPDC);
4772}
4773
4774/**
4775 * igc_down - Close the interface
4776 * @adapter: board private structure
4777 */
4778void igc_down(struct igc_adapter *adapter)
4779{
4780        struct net_device *netdev = adapter->netdev;
4781        struct igc_hw *hw = &adapter->hw;
4782        u32 tctl, rctl;
4783        int i = 0;
4784
4785        set_bit(__IGC_DOWN, &adapter->state);
4786
4787        igc_ptp_suspend(adapter);
4788
4789        if (pci_device_is_present(adapter->pdev)) {
4790                /* disable receives in the hardware */
4791                rctl = rd32(IGC_RCTL);
4792                wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
4793                /* flush and sleep below */
4794        }
4795        /* set trans_start so we don't get spurious watchdogs during reset */
4796        netif_trans_update(netdev);
4797
4798        netif_carrier_off(netdev);
4799        netif_tx_stop_all_queues(netdev);
4800
4801        if (pci_device_is_present(adapter->pdev)) {
4802                /* disable transmits in the hardware */
4803                tctl = rd32(IGC_TCTL);
4804                tctl &= ~IGC_TCTL_EN;
4805                wr32(IGC_TCTL, tctl);
4806                /* flush both disables and wait for them to finish */
4807                wrfl();
4808                usleep_range(10000, 20000);
4809
4810                igc_irq_disable(adapter);
4811        }
4812
4813        adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
4814
4815        for (i = 0; i < adapter->num_q_vectors; i++) {
4816                if (adapter->q_vector[i]) {
4817                        napi_synchronize(&adapter->q_vector[i]->napi);
4818                        napi_disable(&adapter->q_vector[i]->napi);
4819                }
4820        }
4821
4822        del_timer_sync(&adapter->watchdog_timer);
4823        del_timer_sync(&adapter->phy_info_timer);
4824
4825        /* record the stats before reset*/
4826        spin_lock(&adapter->stats64_lock);
4827        igc_update_stats(adapter);
4828        spin_unlock(&adapter->stats64_lock);
4829
4830        adapter->link_speed = 0;
4831        adapter->link_duplex = 0;
4832
4833        if (!pci_channel_offline(adapter->pdev))
4834                igc_reset(adapter);
4835
4836        /* clear VLAN promisc flag so VFTA will be updated if necessary */
4837        adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
4838
4839        igc_clean_all_tx_rings(adapter);
4840        igc_clean_all_rx_rings(adapter);
4841}
4842
4843void igc_reinit_locked(struct igc_adapter *adapter)
4844{
4845        while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
4846                usleep_range(1000, 2000);
4847        igc_down(adapter);
4848        igc_up(adapter);
4849        clear_bit(__IGC_RESETTING, &adapter->state);
4850}
4851
4852static void igc_reset_task(struct work_struct *work)
4853{
4854        struct igc_adapter *adapter;
4855
4856        adapter = container_of(work, struct igc_adapter, reset_task);
4857
4858        rtnl_lock();
4859        /* If we're already down or resetting, just bail */
4860        if (test_bit(__IGC_DOWN, &adapter->state) ||
4861            test_bit(__IGC_RESETTING, &adapter->state)) {
4862                rtnl_unlock();
4863                return;
4864        }
4865
4866        igc_rings_dump(adapter);
4867        igc_regs_dump(adapter);
4868        netdev_err(adapter->netdev, "Reset adapter\n");
4869        igc_reinit_locked(adapter);
4870        rtnl_unlock();
4871}
4872
4873/**
4874 * igc_change_mtu - Change the Maximum Transfer Unit
4875 * @netdev: network interface device structure
4876 * @new_mtu: new value for maximum frame size
4877 *
4878 * Returns 0 on success, negative on failure
4879 */
4880static int igc_change_mtu(struct net_device *netdev, int new_mtu)
4881{
4882        int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4883        struct igc_adapter *adapter = netdev_priv(netdev);
4884
4885        if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) {
4886                netdev_dbg(netdev, "Jumbo frames not supported with XDP");
4887                return -EINVAL;
4888        }
4889
4890        /* adjust max frame to be at least the size of a standard frame */
4891        if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
4892                max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
4893
4894        while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
4895                usleep_range(1000, 2000);
4896
4897        /* igc_down has a dependency on max_frame_size */
4898        adapter->max_frame_size = max_frame;
4899
4900        if (netif_running(netdev))
4901                igc_down(adapter);
4902
4903        netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
4904        netdev->mtu = new_mtu;
4905
4906        if (netif_running(netdev))
4907                igc_up(adapter);
4908        else
4909                igc_reset(adapter);
4910
4911        clear_bit(__IGC_RESETTING, &adapter->state);
4912
4913        return 0;
4914}
4915
4916/**
4917 * igc_get_stats64 - Get System Network Statistics
4918 * @netdev: network interface device structure
4919 * @stats: rtnl_link_stats64 pointer
4920 *
4921 * Returns the address of the device statistics structure.
4922 * The statistics are updated here and also from the timer callback.
4923 */
4924static void igc_get_stats64(struct net_device *netdev,
4925                            struct rtnl_link_stats64 *stats)
4926{
4927        struct igc_adapter *adapter = netdev_priv(netdev);
4928
4929        spin_lock(&adapter->stats64_lock);
4930        if (!test_bit(__IGC_RESETTING, &adapter->state))
4931                igc_update_stats(adapter);
4932        memcpy(stats, &adapter->stats64, sizeof(*stats));
4933        spin_unlock(&adapter->stats64_lock);
4934}
4935
4936static netdev_features_t igc_fix_features(struct net_device *netdev,
4937                                          netdev_features_t features)
4938{
4939        /* Since there is no support for separate Rx/Tx vlan accel
4940         * enable/disable make sure Tx flag is always in same state as Rx.
4941         */
4942        if (features & NETIF_F_HW_VLAN_CTAG_RX)
4943                features |= NETIF_F_HW_VLAN_CTAG_TX;
4944        else
4945                features &= ~NETIF_F_HW_VLAN_CTAG_TX;
4946
4947        return features;
4948}
4949
4950static int igc_set_features(struct net_device *netdev,
4951                            netdev_features_t features)
4952{
4953        netdev_features_t changed = netdev->features ^ features;
4954        struct igc_adapter *adapter = netdev_priv(netdev);
4955
4956        if (changed & NETIF_F_HW_VLAN_CTAG_RX)
4957                igc_vlan_mode(netdev, features);
4958
4959        /* Add VLAN support */
4960        if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
4961                return 0;
4962
4963        if (!(features & NETIF_F_NTUPLE))
4964                igc_flush_nfc_rules(adapter);
4965
4966        netdev->features = features;
4967
4968        if (netif_running(netdev))
4969                igc_reinit_locked(adapter);
4970        else
4971                igc_reset(adapter);
4972
4973        return 1;
4974}
4975
4976static netdev_features_t
4977igc_features_check(struct sk_buff *skb, struct net_device *dev,
4978                   netdev_features_t features)
4979{
4980        unsigned int network_hdr_len, mac_hdr_len;
4981
4982        /* Make certain the headers can be described by a context descriptor */
4983        mac_hdr_len = skb_network_header(skb) - skb->data;
4984        if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN))
4985                return features & ~(NETIF_F_HW_CSUM |
4986                                    NETIF_F_SCTP_CRC |
4987                                    NETIF_F_HW_VLAN_CTAG_TX |
4988                                    NETIF_F_TSO |
4989                                    NETIF_F_TSO6);
4990
4991        network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
4992        if (unlikely(network_hdr_len >  IGC_MAX_NETWORK_HDR_LEN))
4993                return features & ~(NETIF_F_HW_CSUM |
4994                                    NETIF_F_SCTP_CRC |
4995                                    NETIF_F_TSO |
4996                                    NETIF_F_TSO6);
4997
4998        /* We can only support IPv4 TSO in tunnels if we can mangle the
4999         * inner IP ID field, so strip TSO if MANGLEID is not supported.
5000         */
5001        if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
5002                features &= ~NETIF_F_TSO;
5003
5004        return features;
5005}
5006
5007static void igc_tsync_interrupt(struct igc_adapter *adapter)
5008{
5009        u32 ack, tsauxc, sec, nsec, tsicr;
5010        struct igc_hw *hw = &adapter->hw;
5011        struct ptp_clock_event event;
5012        struct timespec64 ts;
5013
5014        tsicr = rd32(IGC_TSICR);
5015        ack = 0;
5016
5017        if (tsicr & IGC_TSICR_SYS_WRAP) {
5018                event.type = PTP_CLOCK_PPS;
5019                if (adapter->ptp_caps.pps)
5020                        ptp_clock_event(adapter->ptp_clock, &event);
5021                ack |= IGC_TSICR_SYS_WRAP;
5022        }
5023
5024        if (tsicr & IGC_TSICR_TXTS) {
5025                /* retrieve hardware timestamp */
5026                schedule_work(&adapter->ptp_tx_work);
5027                ack |= IGC_TSICR_TXTS;
5028        }
5029
5030        if (tsicr & IGC_TSICR_TT0) {
5031                spin_lock(&adapter->tmreg_lock);
5032                ts = timespec64_add(adapter->perout[0].start,
5033                                    adapter->perout[0].period);
5034                wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
5035                wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec);
5036                tsauxc = rd32(IGC_TSAUXC);
5037                tsauxc |= IGC_TSAUXC_EN_TT0;
5038                wr32(IGC_TSAUXC, tsauxc);
5039                adapter->perout[0].start = ts;
5040                spin_unlock(&adapter->tmreg_lock);
5041                ack |= IGC_TSICR_TT0;
5042        }
5043
5044        if (tsicr & IGC_TSICR_TT1) {
5045                spin_lock(&adapter->tmreg_lock);
5046                ts = timespec64_add(adapter->perout[1].start,
5047                                    adapter->perout[1].period);
5048                wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
5049                wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec);
5050                tsauxc = rd32(IGC_TSAUXC);
5051                tsauxc |= IGC_TSAUXC_EN_TT1;
5052                wr32(IGC_TSAUXC, tsauxc);
5053                adapter->perout[1].start = ts;
5054                spin_unlock(&adapter->tmreg_lock);
5055                ack |= IGC_TSICR_TT1;
5056        }
5057
5058        if (tsicr & IGC_TSICR_AUTT0) {
5059                nsec = rd32(IGC_AUXSTMPL0);
5060                sec  = rd32(IGC_AUXSTMPH0);
5061                event.type = PTP_CLOCK_EXTTS;
5062                event.index = 0;
5063                event.timestamp = sec * NSEC_PER_SEC + nsec;
5064                ptp_clock_event(adapter->ptp_clock, &event);
5065                ack |= IGC_TSICR_AUTT0;
5066        }
5067
5068        if (tsicr & IGC_TSICR_AUTT1) {
5069                nsec = rd32(IGC_AUXSTMPL1);
5070                sec  = rd32(IGC_AUXSTMPH1);
5071                event.type = PTP_CLOCK_EXTTS;
5072                event.index = 1;
5073                event.timestamp = sec * NSEC_PER_SEC + nsec;
5074                ptp_clock_event(adapter->ptp_clock, &event);
5075                ack |= IGC_TSICR_AUTT1;
5076        }
5077
5078        /* acknowledge the interrupts */
5079        wr32(IGC_TSICR, ack);
5080}
5081
5082/**
5083 * igc_msix_other - msix other interrupt handler
5084 * @irq: interrupt number
5085 * @data: pointer to a q_vector
5086 */
5087static irqreturn_t igc_msix_other(int irq, void *data)
5088{
5089        struct igc_adapter *adapter = data;
5090        struct igc_hw *hw = &adapter->hw;
5091        u32 icr = rd32(IGC_ICR);
5092
5093        /* reading ICR causes bit 31 of EICR to be cleared */
5094        if (icr & IGC_ICR_DRSTA)
5095                schedule_work(&adapter->reset_task);
5096
5097        if (icr & IGC_ICR_DOUTSYNC) {
5098                /* HW is reporting DMA is out of sync */
5099                adapter->stats.doosync++;
5100        }
5101
5102        if (icr & IGC_ICR_LSC) {
5103                hw->mac.get_link_status = true;
5104                /* guard against interrupt when we're going down */
5105                if (!test_bit(__IGC_DOWN, &adapter->state))
5106                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5107        }
5108
5109        if (icr & IGC_ICR_TS)
5110                igc_tsync_interrupt(adapter);
5111
5112        wr32(IGC_EIMS, adapter->eims_other);
5113
5114        return IRQ_HANDLED;
5115}
5116
5117static void igc_write_itr(struct igc_q_vector *q_vector)
5118{
5119        u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
5120
5121        if (!q_vector->set_itr)
5122                return;
5123
5124        if (!itr_val)
5125                itr_val = IGC_ITR_VAL_MASK;
5126
5127        itr_val |= IGC_EITR_CNT_IGNR;
5128
5129        writel(itr_val, q_vector->itr_register);
5130        q_vector->set_itr = 0;
5131}
5132
5133static irqreturn_t igc_msix_ring(int irq, void *data)
5134{
5135        struct igc_q_vector *q_vector = data;
5136
5137        /* Write the ITR value calculated from the previous interrupt. */
5138        igc_write_itr(q_vector);
5139
5140        napi_schedule(&q_vector->napi);
5141
5142        return IRQ_HANDLED;
5143}
5144
5145/**
5146 * igc_request_msix - Initialize MSI-X interrupts
5147 * @adapter: Pointer to adapter structure
5148 *
5149 * igc_request_msix allocates MSI-X vectors and requests interrupts from the
5150 * kernel.
5151 */
5152static int igc_request_msix(struct igc_adapter *adapter)
5153{
5154        unsigned int num_q_vectors = adapter->num_q_vectors;
5155        int i = 0, err = 0, vector = 0, free_vector = 0;
5156        struct net_device *netdev = adapter->netdev;
5157
5158        err = request_irq(adapter->msix_entries[vector].vector,
5159                          &igc_msix_other, 0, netdev->name, adapter);
5160        if (err)
5161                goto err_out;
5162
5163        if (num_q_vectors > MAX_Q_VECTORS) {
5164                num_q_vectors = MAX_Q_VECTORS;
5165                dev_warn(&adapter->pdev->dev,
5166                         "The number of queue vectors (%d) is higher than max allowed (%d)\n",
5167                         adapter->num_q_vectors, MAX_Q_VECTORS);
5168        }
5169        for (i = 0; i < num_q_vectors; i++) {
5170                struct igc_q_vector *q_vector = adapter->q_vector[i];
5171
5172                vector++;
5173
5174                q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
5175
5176                if (q_vector->rx.ring && q_vector->tx.ring)
5177                        sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
5178                                q_vector->rx.ring->queue_index);
5179                else if (q_vector->tx.ring)
5180                        sprintf(q_vector->name, "%s-tx-%u", netdev->name,
5181                                q_vector->tx.ring->queue_index);
5182                else if (q_vector->rx.ring)
5183                        sprintf(q_vector->name, "%s-rx-%u", netdev->name,
5184                                q_vector->rx.ring->queue_index);
5185                else
5186                        sprintf(q_vector->name, "%s-unused", netdev->name);
5187
5188                err = request_irq(adapter->msix_entries[vector].vector,
5189                                  igc_msix_ring, 0, q_vector->name,
5190                                  q_vector);
5191                if (err)
5192                        goto err_free;
5193        }
5194
5195        igc_configure_msix(adapter);
5196        return 0;
5197
5198err_free:
5199        /* free already assigned IRQs */
5200        free_irq(adapter->msix_entries[free_vector++].vector, adapter);
5201
5202        vector--;
5203        for (i = 0; i < vector; i++) {
5204                free_irq(adapter->msix_entries[free_vector++].vector,
5205                         adapter->q_vector[i]);
5206        }
5207err_out:
5208        return err;
5209}
5210
5211/**
5212 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
5213 * @adapter: Pointer to adapter structure
5214 *
5215 * This function resets the device so that it has 0 rx queues, tx queues, and
5216 * MSI-X interrupts allocated.
5217 */
5218static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
5219{
5220        igc_free_q_vectors(adapter);
5221        igc_reset_interrupt_capability(adapter);
5222}
5223
5224/* Need to wait a few seconds after link up to get diagnostic information from
5225 * the phy
5226 */
5227static void igc_update_phy_info(struct timer_list *t)
5228{
5229        struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
5230
5231        igc_get_phy_info(&adapter->hw);
5232}
5233
5234/**
5235 * igc_has_link - check shared code for link and determine up/down
5236 * @adapter: pointer to driver private info
5237 */
5238bool igc_has_link(struct igc_adapter *adapter)
5239{
5240        struct igc_hw *hw = &adapter->hw;
5241        bool link_active = false;
5242
5243        /* get_link_status is set on LSC (link status) interrupt or
5244         * rx sequence error interrupt.  get_link_status will stay
5245         * false until the igc_check_for_link establishes link
5246         * for copper adapters ONLY
5247         */
5248        if (!hw->mac.get_link_status)
5249                return true;
5250        hw->mac.ops.check_for_link(hw);
5251        link_active = !hw->mac.get_link_status;
5252
5253        if (hw->mac.type == igc_i225) {
5254                if (!netif_carrier_ok(adapter->netdev)) {
5255                        adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
5256                } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
5257                        adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
5258                        adapter->link_check_timeout = jiffies;
5259                }
5260        }
5261
5262        return link_active;
5263}
5264
5265/**
5266 * igc_watchdog - Timer Call-back
5267 * @t: timer for the watchdog
5268 */
5269static void igc_watchdog(struct timer_list *t)
5270{
5271        struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
5272        /* Do the rest outside of interrupt context */
5273        schedule_work(&adapter->watchdog_task);
5274}
5275
5276static void igc_watchdog_task(struct work_struct *work)
5277{
5278        struct igc_adapter *adapter = container_of(work,
5279                                                   struct igc_adapter,
5280                                                   watchdog_task);
5281        struct net_device *netdev = adapter->netdev;
5282        struct igc_hw *hw = &adapter->hw;
5283        struct igc_phy_info *phy = &hw->phy;
5284        u16 phy_data, retry_count = 20;
5285        u32 link;
5286        int i;
5287
5288        link = igc_has_link(adapter);
5289
5290        if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
5291                if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
5292                        adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
5293                else
5294                        link = false;
5295        }
5296
5297        if (link) {
5298                /* Cancel scheduled suspend requests. */
5299                pm_runtime_resume(netdev->dev.parent);
5300
5301                if (!netif_carrier_ok(netdev)) {
5302                        u32 ctrl;
5303
5304                        hw->mac.ops.get_speed_and_duplex(hw,
5305                                                         &adapter->link_speed,
5306                                                         &adapter->link_duplex);
5307
5308                        ctrl = rd32(IGC_CTRL);
5309                        /* Link status message must follow this format */
5310                        netdev_info(netdev,
5311                                    "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
5312                                    adapter->link_speed,
5313                                    adapter->link_duplex == FULL_DUPLEX ?
5314                                    "Full" : "Half",
5315                                    (ctrl & IGC_CTRL_TFCE) &&
5316                                    (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
5317                                    (ctrl & IGC_CTRL_RFCE) ?  "RX" :
5318                                    (ctrl & IGC_CTRL_TFCE) ?  "TX" : "None");
5319
5320                        /* disable EEE if enabled */
5321                        if ((adapter->flags & IGC_FLAG_EEE) &&
5322                            adapter->link_duplex == HALF_DUPLEX) {
5323                                netdev_info(netdev,
5324                                            "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n");
5325                                adapter->hw.dev_spec._base.eee_enable = false;
5326                                adapter->flags &= ~IGC_FLAG_EEE;
5327                        }
5328
5329                        /* check if SmartSpeed worked */
5330                        igc_check_downshift(hw);
5331                        if (phy->speed_downgraded)
5332                                netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
5333
5334                        /* adjust timeout factor according to speed/duplex */
5335                        adapter->tx_timeout_factor = 1;
5336                        switch (adapter->link_speed) {
5337                        case SPEED_10:
5338                                adapter->tx_timeout_factor = 14;
5339                                break;
5340                        case SPEED_100:
5341                        case SPEED_1000:
5342                        case SPEED_2500:
5343                                adapter->tx_timeout_factor = 7;
5344                                break;
5345                        }
5346
5347                        if (adapter->link_speed != SPEED_1000)
5348                                goto no_wait;
5349
5350                        /* wait for Remote receiver status OK */
5351retry_read_status:
5352                        if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
5353                                              &phy_data)) {
5354                                if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
5355                                    retry_count) {
5356                                        msleep(100);
5357                                        retry_count--;
5358                                        goto retry_read_status;
5359                                } else if (!retry_count) {
5360                                        netdev_err(netdev, "exceed max 2 second\n");
5361                                }
5362                        } else {
5363                                netdev_err(netdev, "read 1000Base-T Status Reg\n");
5364                        }
5365no_wait:
5366                        netif_carrier_on(netdev);
5367
5368                        /* link state has changed, schedule phy info update */
5369                        if (!test_bit(__IGC_DOWN, &adapter->state))
5370                                mod_timer(&adapter->phy_info_timer,
5371                                          round_jiffies(jiffies + 2 * HZ));
5372                }
5373        } else {
5374                if (netif_carrier_ok(netdev)) {
5375                        adapter->link_speed = 0;
5376                        adapter->link_duplex = 0;
5377
5378                        /* Links status message must follow this format */
5379                        netdev_info(netdev, "NIC Link is Down\n");
5380                        netif_carrier_off(netdev);
5381
5382                        /* link state has changed, schedule phy info update */
5383                        if (!test_bit(__IGC_DOWN, &adapter->state))
5384                                mod_timer(&adapter->phy_info_timer,
5385                                          round_jiffies(jiffies + 2 * HZ));
5386
5387                        /* link is down, time to check for alternate media */
5388                        if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
5389                                if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
5390                                        schedule_work(&adapter->reset_task);
5391                                        /* return immediately */
5392                                        return;
5393                                }
5394                        }
5395                        pm_schedule_suspend(netdev->dev.parent,
5396                                            MSEC_PER_SEC * 5);
5397
5398                /* also check for alternate media here */
5399                } else if (!netif_carrier_ok(netdev) &&
5400                           (adapter->flags & IGC_FLAG_MAS_ENABLE)) {
5401                        if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
5402                                schedule_work(&adapter->reset_task);
5403                                /* return immediately */
5404                                return;
5405                        }
5406                }
5407        }
5408
5409        spin_lock(&adapter->stats64_lock);
5410        igc_update_stats(adapter);
5411        spin_unlock(&adapter->stats64_lock);
5412
5413        for (i = 0; i < adapter->num_tx_queues; i++) {
5414                struct igc_ring *tx_ring = adapter->tx_ring[i];
5415
5416                if (!netif_carrier_ok(netdev)) {
5417                        /* We've lost link, so the controller stops DMA,
5418                         * but we've got queued Tx work that's never going
5419                         * to get done, so reset controller to flush Tx.
5420                         * (Do the reset outside of interrupt context).
5421                         */
5422                        if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
5423                                adapter->tx_timeout_count++;
5424                                schedule_work(&adapter->reset_task);
5425                                /* return immediately since reset is imminent */
5426                                return;
5427                        }
5428                }
5429
5430                /* Force detection of hung controller every watchdog period */
5431                set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5432        }
5433
5434        /* Cause software interrupt to ensure Rx ring is cleaned */
5435        if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5436                u32 eics = 0;
5437
5438                for (i = 0; i < adapter->num_q_vectors; i++)
5439                        eics |= adapter->q_vector[i]->eims_value;
5440                wr32(IGC_EICS, eics);
5441        } else {
5442                wr32(IGC_ICS, IGC_ICS_RXDMT0);
5443        }
5444
5445        igc_ptp_tx_hang(adapter);
5446
5447        /* Reset the timer */
5448        if (!test_bit(__IGC_DOWN, &adapter->state)) {
5449                if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
5450                        mod_timer(&adapter->watchdog_timer,
5451                                  round_jiffies(jiffies +  HZ));
5452                else
5453                        mod_timer(&adapter->watchdog_timer,
5454                                  round_jiffies(jiffies + 2 * HZ));
5455        }
5456}
5457
5458/**
5459 * igc_intr_msi - Interrupt Handler
5460 * @irq: interrupt number
5461 * @data: pointer to a network interface device structure
5462 */
5463static irqreturn_t igc_intr_msi(int irq, void *data)
5464{
5465        struct igc_adapter *adapter = data;
5466        struct igc_q_vector *q_vector = adapter->q_vector[0];
5467        struct igc_hw *hw = &adapter->hw;
5468        /* read ICR disables interrupts using IAM */
5469        u32 icr = rd32(IGC_ICR);
5470
5471        igc_write_itr(q_vector);
5472
5473        if (icr & IGC_ICR_DRSTA)
5474                schedule_work(&adapter->reset_task);
5475
5476        if (icr & IGC_ICR_DOUTSYNC) {
5477                /* HW is reporting DMA is out of sync */
5478                adapter->stats.doosync++;
5479        }
5480
5481        if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5482                hw->mac.get_link_status = true;
5483                if (!test_bit(__IGC_DOWN, &adapter->state))
5484                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5485        }
5486
5487        if (icr & IGC_ICR_TS)
5488                igc_tsync_interrupt(adapter);
5489
5490        napi_schedule(&q_vector->napi);
5491
5492        return IRQ_HANDLED;
5493}
5494
5495/**
5496 * igc_intr - Legacy Interrupt Handler
5497 * @irq: interrupt number
5498 * @data: pointer to a network interface device structure
5499 */
5500static irqreturn_t igc_intr(int irq, void *data)
5501{
5502        struct igc_adapter *adapter = data;
5503        struct igc_q_vector *q_vector = adapter->q_vector[0];
5504        struct igc_hw *hw = &adapter->hw;
5505        /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5506         * need for the IMC write
5507         */
5508        u32 icr = rd32(IGC_ICR);
5509
5510        /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5511         * not set, then the adapter didn't send an interrupt
5512         */
5513        if (!(icr & IGC_ICR_INT_ASSERTED))
5514                return IRQ_NONE;
5515
5516        igc_write_itr(q_vector);
5517
5518        if (icr & IGC_ICR_DRSTA)
5519                schedule_work(&adapter->reset_task);
5520
5521        if (icr & IGC_ICR_DOUTSYNC) {
5522                /* HW is reporting DMA is out of sync */
5523                adapter->stats.doosync++;
5524        }
5525
5526        if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5527                hw->mac.get_link_status = true;
5528                /* guard against interrupt when we're going down */
5529                if (!test_bit(__IGC_DOWN, &adapter->state))
5530                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5531        }
5532
5533        if (icr & IGC_ICR_TS)
5534                igc_tsync_interrupt(adapter);
5535
5536        napi_schedule(&q_vector->napi);
5537
5538        return IRQ_HANDLED;
5539}
5540
5541static void igc_free_irq(struct igc_adapter *adapter)
5542{
5543        if (adapter->msix_entries) {
5544                int vector = 0, i;
5545
5546                free_irq(adapter->msix_entries[vector++].vector, adapter);
5547
5548                for (i = 0; i < adapter->num_q_vectors; i++)
5549                        free_irq(adapter->msix_entries[vector++].vector,
5550                                 adapter->q_vector[i]);
5551        } else {
5552                free_irq(adapter->pdev->irq, adapter);
5553        }
5554}
5555
5556/**
5557 * igc_request_irq - initialize interrupts
5558 * @adapter: Pointer to adapter structure
5559 *
5560 * Attempts to configure interrupts using the best available
5561 * capabilities of the hardware and kernel.
5562 */
5563static int igc_request_irq(struct igc_adapter *adapter)
5564{
5565        struct net_device *netdev = adapter->netdev;
5566        struct pci_dev *pdev = adapter->pdev;
5567        int err = 0;
5568
5569        if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5570                err = igc_request_msix(adapter);
5571                if (!err)
5572                        goto request_done;
5573                /* fall back to MSI */
5574                igc_free_all_tx_resources(adapter);
5575                igc_free_all_rx_resources(adapter);
5576
5577                igc_clear_interrupt_scheme(adapter);
5578                err = igc_init_interrupt_scheme(adapter, false);
5579                if (err)
5580                        goto request_done;
5581                igc_setup_all_tx_resources(adapter);
5582                igc_setup_all_rx_resources(adapter);
5583                igc_configure(adapter);
5584        }
5585
5586        igc_assign_vector(adapter->q_vector[0], 0);
5587
5588        if (adapter->flags & IGC_FLAG_HAS_MSI) {
5589                err = request_irq(pdev->irq, &igc_intr_msi, 0,
5590                                  netdev->name, adapter);
5591                if (!err)
5592                        goto request_done;
5593
5594                /* fall back to legacy interrupts */
5595                igc_reset_interrupt_capability(adapter);
5596                adapter->flags &= ~IGC_FLAG_HAS_MSI;
5597        }
5598
5599        err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
5600                          netdev->name, adapter);
5601
5602        if (err)
5603                netdev_err(netdev, "Error %d getting interrupt\n", err);
5604
5605request_done:
5606        return err;
5607}
5608
5609/**
5610 * __igc_open - Called when a network interface is made active
5611 * @netdev: network interface device structure
5612 * @resuming: boolean indicating if the device is resuming
5613 *
5614 * Returns 0 on success, negative value on failure
5615 *
5616 * The open entry point is called when a network interface is made
5617 * active by the system (IFF_UP).  At this point all resources needed
5618 * for transmit and receive operations are allocated, the interrupt
5619 * handler is registered with the OS, the watchdog timer is started,
5620 * and the stack is notified that the interface is ready.
5621 */
5622static int __igc_open(struct net_device *netdev, bool resuming)
5623{
5624        struct igc_adapter *adapter = netdev_priv(netdev);
5625        struct pci_dev *pdev = adapter->pdev;
5626        struct igc_hw *hw = &adapter->hw;
5627        int err = 0;
5628        int i = 0;
5629
5630        /* disallow open during test */
5631
5632        if (test_bit(__IGC_TESTING, &adapter->state)) {
5633                WARN_ON(resuming);
5634                return -EBUSY;
5635        }
5636
5637        if (!resuming)
5638                pm_runtime_get_sync(&pdev->dev);
5639
5640        netif_carrier_off(netdev);
5641
5642        /* allocate transmit descriptors */
5643        err = igc_setup_all_tx_resources(adapter);
5644        if (err)
5645                goto err_setup_tx;
5646
5647        /* allocate receive descriptors */
5648        err = igc_setup_all_rx_resources(adapter);
5649        if (err)
5650                goto err_setup_rx;
5651
5652        igc_power_up_link(adapter);
5653
5654        igc_configure(adapter);
5655
5656        err = igc_request_irq(adapter);
5657        if (err)
5658                goto err_req_irq;
5659
5660        /* Notify the stack of the actual queue counts. */
5661        err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
5662        if (err)
5663                goto err_set_queues;
5664
5665        err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
5666        if (err)
5667                goto err_set_queues;
5668
5669        clear_bit(__IGC_DOWN, &adapter->state);
5670
5671        for (i = 0; i < adapter->num_q_vectors; i++)
5672                napi_enable(&adapter->q_vector[i]->napi);
5673
5674        /* Clear any pending interrupts. */
5675        rd32(IGC_ICR);
5676        igc_irq_enable(adapter);
5677
5678        if (!resuming)
5679                pm_runtime_put(&pdev->dev);
5680
5681        netif_tx_start_all_queues(netdev);
5682
5683        /* start the watchdog. */
5684        hw->mac.get_link_status = true;
5685        schedule_work(&adapter->watchdog_task);
5686
5687        return IGC_SUCCESS;
5688
5689err_set_queues:
5690        igc_free_irq(adapter);
5691err_req_irq:
5692        igc_release_hw_control(adapter);
5693        igc_power_down_phy_copper_base(&adapter->hw);
5694        igc_free_all_rx_resources(adapter);
5695err_setup_rx:
5696        igc_free_all_tx_resources(adapter);
5697err_setup_tx:
5698        igc_reset(adapter);
5699        if (!resuming)
5700                pm_runtime_put(&pdev->dev);
5701
5702        return err;
5703}
5704
5705int igc_open(struct net_device *netdev)
5706{
5707        return __igc_open(netdev, false);
5708}
5709
5710/**
5711 * __igc_close - Disables a network interface
5712 * @netdev: network interface device structure
5713 * @suspending: boolean indicating the device is suspending
5714 *
5715 * Returns 0, this is not allowed to fail
5716 *
5717 * The close entry point is called when an interface is de-activated
5718 * by the OS.  The hardware is still under the driver's control, but
5719 * needs to be disabled.  A global MAC reset is issued to stop the
5720 * hardware, and all transmit and receive resources are freed.
5721 */
5722static int __igc_close(struct net_device *netdev, bool suspending)
5723{
5724        struct igc_adapter *adapter = netdev_priv(netdev);
5725        struct pci_dev *pdev = adapter->pdev;
5726
5727        WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
5728
5729        if (!suspending)
5730                pm_runtime_get_sync(&pdev->dev);
5731
5732        igc_down(adapter);
5733
5734        igc_release_hw_control(adapter);
5735
5736        igc_free_irq(adapter);
5737
5738        igc_free_all_tx_resources(adapter);
5739        igc_free_all_rx_resources(adapter);
5740
5741        if (!suspending)
5742                pm_runtime_put_sync(&pdev->dev);
5743
5744        return 0;
5745}
5746
5747int igc_close(struct net_device *netdev)
5748{
5749        if (netif_device_present(netdev) || netdev->dismantle)
5750                return __igc_close(netdev, false);
5751        return 0;
5752}
5753
5754/**
5755 * igc_ioctl - Access the hwtstamp interface
5756 * @netdev: network interface device structure
5757 * @ifr: interface request data
5758 * @cmd: ioctl command
5759 **/
5760static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5761{
5762        switch (cmd) {
5763        case SIOCGHWTSTAMP:
5764                return igc_ptp_get_ts_config(netdev, ifr);
5765        case SIOCSHWTSTAMP:
5766                return igc_ptp_set_ts_config(netdev, ifr);
5767        default:
5768                return -EOPNOTSUPP;
5769        }
5770}
5771
5772static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
5773                                      bool enable)
5774{
5775        struct igc_ring *ring;
5776
5777        if (queue < 0 || queue >= adapter->num_tx_queues)
5778                return -EINVAL;
5779
5780        ring = adapter->tx_ring[queue];
5781        ring->launchtime_enable = enable;
5782
5783        return 0;
5784}
5785
5786static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now)
5787{
5788        struct timespec64 b;
5789
5790        b = ktime_to_timespec64(base_time);
5791
5792        return timespec64_compare(now, &b) > 0;
5793}
5794
5795static bool validate_schedule(struct igc_adapter *adapter,
5796                              const struct tc_taprio_qopt_offload *qopt)
5797{
5798        int queue_uses[IGC_MAX_TX_QUEUES] = { };
5799        struct timespec64 now;
5800        size_t n;
5801
5802        if (qopt->cycle_time_extension)
5803                return false;
5804
5805        igc_ptp_read(adapter, &now);
5806
5807        /* If we program the controller's BASET registers with a time
5808         * in the future, it will hold all the packets until that
5809         * time, causing a lot of TX Hangs, so to avoid that, we
5810         * reject schedules that would start in the future.
5811         */
5812        if (!is_base_time_past(qopt->base_time, &now))
5813                return false;
5814
5815        for (n = 0; n < qopt->num_entries; n++) {
5816                const struct tc_taprio_sched_entry *e;
5817                int i;
5818
5819                e = &qopt->entries[n];
5820
5821                /* i225 only supports "global" frame preemption
5822                 * settings.
5823                 */
5824                if (e->command != TC_TAPRIO_CMD_SET_GATES)
5825                        return false;
5826
5827                for (i = 0; i < adapter->num_tx_queues; i++) {
5828                        if (e->gate_mask & BIT(i))
5829                                queue_uses[i]++;
5830
5831                        if (queue_uses[i] > 1)
5832                                return false;
5833                }
5834        }
5835
5836        return true;
5837}
5838
5839static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
5840                                     struct tc_etf_qopt_offload *qopt)
5841{
5842        struct igc_hw *hw = &adapter->hw;
5843        int err;
5844
5845        if (hw->mac.type != igc_i225)
5846                return -EOPNOTSUPP;
5847
5848        err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable);
5849        if (err)
5850                return err;
5851
5852        return igc_tsn_offload_apply(adapter);
5853}
5854
5855static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
5856{
5857        int i;
5858
5859        adapter->base_time = 0;
5860        adapter->cycle_time = NSEC_PER_SEC;
5861
5862        for (i = 0; i < adapter->num_tx_queues; i++) {
5863                struct igc_ring *ring = adapter->tx_ring[i];
5864
5865                ring->start_time = 0;
5866                ring->end_time = NSEC_PER_SEC;
5867        }
5868
5869        return 0;
5870}
5871
5872static int igc_save_qbv_schedule(struct igc_adapter *adapter,
5873                                 struct tc_taprio_qopt_offload *qopt)
5874{
5875        u32 start_time = 0, end_time = 0;
5876        size_t n;
5877
5878        if (!qopt->enable)
5879                return igc_tsn_clear_schedule(adapter);
5880
5881        if (adapter->base_time)
5882                return -EALREADY;
5883
5884        if (!validate_schedule(adapter, qopt))
5885                return -EINVAL;
5886
5887        adapter->cycle_time = qopt->cycle_time;
5888        adapter->base_time = qopt->base_time;
5889
5890        /* FIXME: be a little smarter about cases when the gate for a
5891         * queue stays open for more than one entry.
5892         */
5893        for (n = 0; n < qopt->num_entries; n++) {
5894                struct tc_taprio_sched_entry *e = &qopt->entries[n];
5895                int i;
5896
5897                end_time += e->interval;
5898
5899                for (i = 0; i < adapter->num_tx_queues; i++) {
5900                        struct igc_ring *ring = adapter->tx_ring[i];
5901
5902                        if (!(e->gate_mask & BIT(i)))
5903                                continue;
5904
5905                        ring->start_time = start_time;
5906                        ring->end_time = end_time;
5907                }
5908
5909                start_time += e->interval;
5910        }
5911
5912        return 0;
5913}
5914
5915static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
5916                                         struct tc_taprio_qopt_offload *qopt)
5917{
5918        struct igc_hw *hw = &adapter->hw;
5919        int err;
5920
5921        if (hw->mac.type != igc_i225)
5922                return -EOPNOTSUPP;
5923
5924        err = igc_save_qbv_schedule(adapter, qopt);
5925        if (err)
5926                return err;
5927
5928        return igc_tsn_offload_apply(adapter);
5929}
5930
5931static int igc_save_cbs_params(struct igc_adapter *adapter, int queue,
5932                               bool enable, int idleslope, int sendslope,
5933                               int hicredit, int locredit)
5934{
5935        bool cbs_status[IGC_MAX_SR_QUEUES] = { false };
5936        struct net_device *netdev = adapter->netdev;
5937        struct igc_ring *ring;
5938        int i;
5939
5940        /* i225 has two sets of credit-based shaper logic.
5941         * Supporting it only on the top two priority queues
5942         */
5943        if (queue < 0 || queue > 1)
5944                return -EINVAL;
5945
5946        ring = adapter->tx_ring[queue];
5947
5948        for (i = 0; i < IGC_MAX_SR_QUEUES; i++)
5949                if (adapter->tx_ring[i])
5950                        cbs_status[i] = adapter->tx_ring[i]->cbs_enable;
5951
5952        /* CBS should be enabled on the highest priority queue first in order
5953         * for the CBS algorithm to operate as intended.
5954         */
5955        if (enable) {
5956                if (queue == 1 && !cbs_status[0]) {
5957                        netdev_err(netdev,
5958                                   "Enabling CBS on queue1 before queue0\n");
5959                        return -EINVAL;
5960                }
5961        } else {
5962                if (queue == 0 && cbs_status[1]) {
5963                        netdev_err(netdev,
5964                                   "Disabling CBS on queue0 before queue1\n");
5965                        return -EINVAL;
5966                }
5967        }
5968
5969        ring->cbs_enable = enable;
5970        ring->idleslope = idleslope;
5971        ring->sendslope = sendslope;
5972        ring->hicredit = hicredit;
5973        ring->locredit = locredit;
5974
5975        return 0;
5976}
5977
5978static int igc_tsn_enable_cbs(struct igc_adapter *adapter,
5979                              struct tc_cbs_qopt_offload *qopt)
5980{
5981        struct igc_hw *hw = &adapter->hw;
5982        int err;
5983
5984        if (hw->mac.type != igc_i225)
5985                return -EOPNOTSUPP;
5986
5987        if (qopt->queue < 0 || qopt->queue > 1)
5988                return -EINVAL;
5989
5990        err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable,
5991                                  qopt->idleslope, qopt->sendslope,
5992                                  qopt->hicredit, qopt->locredit);
5993        if (err)
5994                return err;
5995
5996        return igc_tsn_offload_apply(adapter);
5997}
5998
5999static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
6000                        void *type_data)
6001{
6002        struct igc_adapter *adapter = netdev_priv(dev);
6003
6004        switch (type) {
6005        case TC_SETUP_QDISC_TAPRIO:
6006                return igc_tsn_enable_qbv_scheduling(adapter, type_data);
6007
6008        case TC_SETUP_QDISC_ETF:
6009                return igc_tsn_enable_launchtime(adapter, type_data);
6010
6011        case TC_SETUP_QDISC_CBS:
6012                return igc_tsn_enable_cbs(adapter, type_data);
6013
6014        default:
6015                return -EOPNOTSUPP;
6016        }
6017}
6018
6019static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
6020{
6021        struct igc_adapter *adapter = netdev_priv(dev);
6022
6023        switch (bpf->command) {
6024        case XDP_SETUP_PROG:
6025                return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
6026        case XDP_SETUP_XSK_POOL:
6027                return igc_xdp_setup_pool(adapter, bpf->xsk.pool,
6028                                          bpf->xsk.queue_id);
6029        default:
6030                return -EOPNOTSUPP;
6031        }
6032}
6033
6034static int igc_xdp_xmit(struct net_device *dev, int num_frames,
6035                        struct xdp_frame **frames, u32 flags)
6036{
6037        struct igc_adapter *adapter = netdev_priv(dev);
6038        int cpu = smp_processor_id();
6039        struct netdev_queue *nq;
6040        struct igc_ring *ring;
6041        int i, drops;
6042
6043        if (unlikely(test_bit(__IGC_DOWN, &adapter->state)))
6044                return -ENETDOWN;
6045
6046        if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
6047                return -EINVAL;
6048
6049        ring = igc_xdp_get_tx_ring(adapter, cpu);
6050        nq = txring_txq(ring);
6051
6052        __netif_tx_lock(nq, cpu);
6053
6054        drops = 0;
6055        for (i = 0; i < num_frames; i++) {
6056                int err;
6057                struct xdp_frame *xdpf = frames[i];
6058
6059                err = igc_xdp_init_tx_descriptor(ring, xdpf);
6060                if (err) {
6061                        xdp_return_frame_rx_napi(xdpf);
6062                        drops++;
6063                }
6064        }
6065
6066        if (flags & XDP_XMIT_FLUSH)
6067                igc_flush_tx_descriptors(ring);
6068
6069        __netif_tx_unlock(nq);
6070
6071        return num_frames - drops;
6072}
6073
6074static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
6075                                        struct igc_q_vector *q_vector)
6076{
6077        struct igc_hw *hw = &adapter->hw;
6078        u32 eics = 0;
6079
6080        eics |= q_vector->eims_value;
6081        wr32(IGC_EICS, eics);
6082}
6083
6084int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
6085{
6086        struct igc_adapter *adapter = netdev_priv(dev);
6087        struct igc_q_vector *q_vector;
6088        struct igc_ring *ring;
6089
6090        if (test_bit(__IGC_DOWN, &adapter->state))
6091                return -ENETDOWN;
6092
6093        if (!igc_xdp_is_enabled(adapter))
6094                return -ENXIO;
6095
6096        if (queue_id >= adapter->num_rx_queues)
6097                return -EINVAL;
6098
6099        ring = adapter->rx_ring[queue_id];
6100
6101        if (!ring->xsk_pool)
6102                return -ENXIO;
6103
6104        q_vector = adapter->q_vector[queue_id];
6105        if (!napi_if_scheduled_mark_missed(&q_vector->napi))
6106                igc_trigger_rxtxq_interrupt(adapter, q_vector);
6107
6108        return 0;
6109}
6110
6111static const struct net_device_ops igc_netdev_ops = {
6112        .ndo_open               = igc_open,
6113        .ndo_stop               = igc_close,
6114        .ndo_start_xmit         = igc_xmit_frame,
6115        .ndo_set_rx_mode        = igc_set_rx_mode,
6116        .ndo_set_mac_address    = igc_set_mac,
6117        .ndo_change_mtu         = igc_change_mtu,
6118        .ndo_get_stats64        = igc_get_stats64,
6119        .ndo_fix_features       = igc_fix_features,
6120        .ndo_set_features       = igc_set_features,
6121        .ndo_features_check     = igc_features_check,
6122        .ndo_eth_ioctl          = igc_ioctl,
6123        .ndo_setup_tc           = igc_setup_tc,
6124        .ndo_bpf                = igc_bpf,
6125        .ndo_xdp_xmit           = igc_xdp_xmit,
6126        .ndo_xsk_wakeup         = igc_xsk_wakeup,
6127};
6128
6129/* PCIe configuration access */
6130void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
6131{
6132        struct igc_adapter *adapter = hw->back;
6133
6134        pci_read_config_word(adapter->pdev, reg, value);
6135}
6136
6137void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
6138{
6139        struct igc_adapter *adapter = hw->back;
6140
6141        pci_write_config_word(adapter->pdev, reg, *value);
6142}
6143
6144s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
6145{
6146        struct igc_adapter *adapter = hw->back;
6147
6148        if (!pci_is_pcie(adapter->pdev))
6149                return -IGC_ERR_CONFIG;
6150
6151        pcie_capability_read_word(adapter->pdev, reg, value);
6152
6153        return IGC_SUCCESS;
6154}
6155
6156s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
6157{
6158        struct igc_adapter *adapter = hw->back;
6159
6160        if (!pci_is_pcie(adapter->pdev))
6161                return -IGC_ERR_CONFIG;
6162
6163        pcie_capability_write_word(adapter->pdev, reg, *value);
6164
6165        return IGC_SUCCESS;
6166}
6167
6168u32 igc_rd32(struct igc_hw *hw, u32 reg)
6169{
6170        struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
6171        u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
6172        u32 value = 0;
6173
6174        if (IGC_REMOVED(hw_addr))
6175                return ~value;
6176
6177        value = readl(&hw_addr[reg]);
6178
6179        /* reads should not return all F's */
6180        if (!(~value) && (!reg || !(~readl(hw_addr)))) {
6181                struct net_device *netdev = igc->netdev;
6182
6183                hw->hw_addr = NULL;
6184                netif_device_detach(netdev);
6185                netdev_err(netdev, "PCIe link lost, device now detached\n");
6186                WARN(pci_device_is_present(igc->pdev),
6187                     "igc: Failed to read reg 0x%x!\n", reg);
6188        }
6189
6190        return value;
6191}
6192
6193/**
6194 * igc_probe - Device Initialization Routine
6195 * @pdev: PCI device information struct
6196 * @ent: entry in igc_pci_tbl
6197 *
6198 * Returns 0 on success, negative on failure
6199 *
6200 * igc_probe initializes an adapter identified by a pci_dev structure.
6201 * The OS initialization, configuring the adapter private structure,
6202 * and a hardware reset occur.
6203 */
6204static int igc_probe(struct pci_dev *pdev,
6205                     const struct pci_device_id *ent)
6206{
6207        struct igc_adapter *adapter;
6208        struct net_device *netdev;
6209        struct igc_hw *hw;
6210        const struct igc_info *ei = igc_info_tbl[ent->driver_data];
6211        int err;
6212
6213        err = pci_enable_device_mem(pdev);
6214        if (err)
6215                return err;
6216
6217        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
6218        if (err) {
6219                dev_err(&pdev->dev,
6220                        "No usable DMA configuration, aborting\n");
6221                goto err_dma;
6222        }
6223
6224        err = pci_request_mem_regions(pdev, igc_driver_name);
6225        if (err)
6226                goto err_pci_reg;
6227
6228        pci_enable_pcie_error_reporting(pdev);
6229
6230        err = pci_enable_ptm(pdev, NULL);
6231        if (err < 0)
6232                dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n");
6233
6234        pci_set_master(pdev);
6235
6236        err = -ENOMEM;
6237        netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
6238                                   IGC_MAX_TX_QUEUES);
6239
6240        if (!netdev)
6241                goto err_alloc_etherdev;
6242
6243        SET_NETDEV_DEV(netdev, &pdev->dev);
6244
6245        pci_set_drvdata(pdev, netdev);
6246        adapter = netdev_priv(netdev);
6247        adapter->netdev = netdev;
6248        adapter->pdev = pdev;
6249        hw = &adapter->hw;
6250        hw->back = adapter;
6251        adapter->port_num = hw->bus.func;
6252        adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
6253
6254        err = pci_save_state(pdev);
6255        if (err)
6256                goto err_ioremap;
6257
6258        err = -EIO;
6259        adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
6260                                   pci_resource_len(pdev, 0));
6261        if (!adapter->io_addr)
6262                goto err_ioremap;
6263
6264        /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
6265        hw->hw_addr = adapter->io_addr;
6266
6267        netdev->netdev_ops = &igc_netdev_ops;
6268        igc_ethtool_set_ops(netdev);
6269        netdev->watchdog_timeo = 5 * HZ;
6270
6271        netdev->mem_start = pci_resource_start(pdev, 0);
6272        netdev->mem_end = pci_resource_end(pdev, 0);
6273
6274        /* PCI config space info */
6275        hw->vendor_id = pdev->vendor;
6276        hw->device_id = pdev->device;
6277        hw->revision_id = pdev->revision;
6278        hw->subsystem_vendor_id = pdev->subsystem_vendor;
6279        hw->subsystem_device_id = pdev->subsystem_device;
6280
6281        /* Copy the default MAC and PHY function pointers */
6282        memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
6283        memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
6284
6285        /* Initialize skew-specific constants */
6286        err = ei->get_invariants(hw);
6287        if (err)
6288                goto err_sw_init;
6289
6290        /* Add supported features to the features list*/
6291        netdev->features |= NETIF_F_SG;
6292        netdev->features |= NETIF_F_TSO;
6293        netdev->features |= NETIF_F_TSO6;
6294        netdev->features |= NETIF_F_TSO_ECN;
6295        netdev->features |= NETIF_F_RXCSUM;
6296        netdev->features |= NETIF_F_HW_CSUM;
6297        netdev->features |= NETIF_F_SCTP_CRC;
6298        netdev->features |= NETIF_F_HW_TC;
6299
6300#define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
6301                                  NETIF_F_GSO_GRE_CSUM | \
6302                                  NETIF_F_GSO_IPXIP4 | \
6303                                  NETIF_F_GSO_IPXIP6 | \
6304                                  NETIF_F_GSO_UDP_TUNNEL | \
6305                                  NETIF_F_GSO_UDP_TUNNEL_CSUM)
6306
6307        netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES;
6308        netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES;
6309
6310        /* setup the private structure */
6311        err = igc_sw_init(adapter);
6312        if (err)
6313                goto err_sw_init;
6314
6315        /* copy netdev features into list of user selectable features */
6316        netdev->hw_features |= NETIF_F_NTUPLE;
6317        netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
6318        netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
6319        netdev->hw_features |= netdev->features;
6320
6321        netdev->features |= NETIF_F_HIGHDMA;
6322
6323        netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
6324        netdev->mpls_features |= NETIF_F_HW_CSUM;
6325        netdev->hw_enc_features |= netdev->vlan_features;
6326
6327        /* MTU range: 68 - 9216 */
6328        netdev->min_mtu = ETH_MIN_MTU;
6329        netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
6330
6331        /* before reading the NVM, reset the controller to put the device in a
6332         * known good starting state
6333         */
6334        hw->mac.ops.reset_hw(hw);
6335
6336        if (igc_get_flash_presence_i225(hw)) {
6337                if (hw->nvm.ops.validate(hw) < 0) {
6338                        dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
6339                        err = -EIO;
6340                        goto err_eeprom;
6341                }
6342        }
6343
6344        if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
6345                /* copy the MAC address out of the NVM */
6346                if (hw->mac.ops.read_mac_addr(hw))
6347                        dev_err(&pdev->dev, "NVM Read Error\n");
6348        }
6349
6350        eth_hw_addr_set(netdev, hw->mac.addr);
6351
6352        if (!is_valid_ether_addr(netdev->dev_addr)) {
6353                dev_err(&pdev->dev, "Invalid MAC Address\n");
6354                err = -EIO;
6355                goto err_eeprom;
6356        }
6357
6358        /* configure RXPBSIZE and TXPBSIZE */
6359        wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
6360        wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
6361
6362        timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
6363        timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
6364
6365        INIT_WORK(&adapter->reset_task, igc_reset_task);
6366        INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
6367
6368        /* Initialize link properties that are user-changeable */
6369        adapter->fc_autoneg = true;
6370        hw->mac.autoneg = true;
6371        hw->phy.autoneg_advertised = 0xaf;
6372
6373        hw->fc.requested_mode = igc_fc_default;
6374        hw->fc.current_mode = igc_fc_default;
6375
6376        /* By default, support wake on port A */
6377        adapter->flags |= IGC_FLAG_WOL_SUPPORTED;
6378
6379        /* initialize the wol settings based on the eeprom settings */
6380        if (adapter->flags & IGC_FLAG_WOL_SUPPORTED)
6381                adapter->wol |= IGC_WUFC_MAG;
6382
6383        device_set_wakeup_enable(&adapter->pdev->dev,
6384                                 adapter->flags & IGC_FLAG_WOL_SUPPORTED);
6385
6386        igc_ptp_init(adapter);
6387
6388        igc_tsn_clear_schedule(adapter);
6389
6390        /* reset the hardware with the new settings */
6391        igc_reset(adapter);
6392
6393        /* let the f/w know that the h/w is now under the control of the
6394         * driver.
6395         */
6396        igc_get_hw_control(adapter);
6397
6398        strncpy(netdev->name, "eth%d", IFNAMSIZ);
6399        err = register_netdev(netdev);
6400        if (err)
6401                goto err_register;
6402
6403         /* carrier off reporting is important to ethtool even BEFORE open */
6404        netif_carrier_off(netdev);
6405
6406        /* Check if Media Autosense is enabled */
6407        adapter->ei = *ei;
6408
6409        /* print pcie link status and MAC address */
6410        pcie_print_link_status(pdev);
6411        netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
6412
6413        dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
6414        /* Disable EEE for internal PHY devices */
6415        hw->dev_spec._base.eee_enable = false;
6416        adapter->flags &= ~IGC_FLAG_EEE;
6417        igc_set_eee_i225(hw, false, false, false);
6418
6419        pm_runtime_put_noidle(&pdev->dev);
6420
6421        return 0;
6422
6423err_register:
6424        igc_release_hw_control(adapter);
6425err_eeprom:
6426        if (!igc_check_reset_block(hw))
6427                igc_reset_phy(hw);
6428err_sw_init:
6429        igc_clear_interrupt_scheme(adapter);
6430        iounmap(adapter->io_addr);
6431err_ioremap:
6432        free_netdev(netdev);
6433err_alloc_etherdev:
6434        pci_disable_pcie_error_reporting(pdev);
6435        pci_release_mem_regions(pdev);
6436err_pci_reg:
6437err_dma:
6438        pci_disable_device(pdev);
6439        return err;
6440}
6441
6442/**
6443 * igc_remove - Device Removal Routine
6444 * @pdev: PCI device information struct
6445 *
6446 * igc_remove is called by the PCI subsystem to alert the driver
6447 * that it should release a PCI device.  This could be caused by a
6448 * Hot-Plug event, or because the driver is going to be removed from
6449 * memory.
6450 */
6451static void igc_remove(struct pci_dev *pdev)
6452{
6453        struct net_device *netdev = pci_get_drvdata(pdev);
6454        struct igc_adapter *adapter = netdev_priv(netdev);
6455
6456        pm_runtime_get_noresume(&pdev->dev);
6457
6458        igc_flush_nfc_rules(adapter);
6459
6460        igc_ptp_stop(adapter);
6461
6462        set_bit(__IGC_DOWN, &adapter->state);
6463
6464        del_timer_sync(&adapter->watchdog_timer);
6465        del_timer_sync(&adapter->phy_info_timer);
6466
6467        cancel_work_sync(&adapter->reset_task);
6468        cancel_work_sync(&adapter->watchdog_task);
6469
6470        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6471         * would have already happened in close and is redundant.
6472         */
6473        igc_release_hw_control(adapter);
6474        unregister_netdev(netdev);
6475
6476        igc_clear_interrupt_scheme(adapter);
6477        pci_iounmap(pdev, adapter->io_addr);
6478        pci_release_mem_regions(pdev);
6479
6480        free_netdev(netdev);
6481
6482        pci_disable_pcie_error_reporting(pdev);
6483
6484        pci_disable_device(pdev);
6485}
6486
6487static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake,
6488                          bool runtime)
6489{
6490        struct net_device *netdev = pci_get_drvdata(pdev);
6491        struct igc_adapter *adapter = netdev_priv(netdev);
6492        u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol;
6493        struct igc_hw *hw = &adapter->hw;
6494        u32 ctrl, rctl, status;
6495        bool wake;
6496
6497        rtnl_lock();
6498        netif_device_detach(netdev);
6499
6500        if (netif_running(netdev))
6501                __igc_close(netdev, true);
6502
6503        igc_ptp_suspend(adapter);
6504
6505        igc_clear_interrupt_scheme(adapter);
6506        rtnl_unlock();
6507
6508        status = rd32(IGC_STATUS);
6509        if (status & IGC_STATUS_LU)
6510                wufc &= ~IGC_WUFC_LNKC;
6511
6512        if (wufc) {
6513                igc_setup_rctl(adapter);
6514                igc_set_rx_mode(netdev);
6515
6516                /* turn on all-multi mode if wake on multicast is enabled */
6517                if (wufc & IGC_WUFC_MC) {
6518                        rctl = rd32(IGC_RCTL);
6519                        rctl |= IGC_RCTL_MPE;
6520                        wr32(IGC_RCTL, rctl);
6521                }
6522
6523                ctrl = rd32(IGC_CTRL);
6524                ctrl |= IGC_CTRL_ADVD3WUC;
6525                wr32(IGC_CTRL, ctrl);
6526
6527                /* Allow time for pending master requests to run */
6528                igc_disable_pcie_master(hw);
6529
6530                wr32(IGC_WUC, IGC_WUC_PME_EN);
6531                wr32(IGC_WUFC, wufc);
6532        } else {
6533                wr32(IGC_WUC, 0);
6534                wr32(IGC_WUFC, 0);
6535        }
6536
6537        wake = wufc || adapter->en_mng_pt;
6538        if (!wake)
6539                igc_power_down_phy_copper_base(&adapter->hw);
6540        else
6541                igc_power_up_link(adapter);
6542
6543        if (enable_wake)
6544                *enable_wake = wake;
6545
6546        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6547         * would have already happened in close and is redundant.
6548         */
6549        igc_release_hw_control(adapter);
6550
6551        pci_disable_device(pdev);
6552
6553        return 0;
6554}
6555
6556#ifdef CONFIG_PM
6557static int __maybe_unused igc_runtime_suspend(struct device *dev)
6558{
6559        return __igc_shutdown(to_pci_dev(dev), NULL, 1);
6560}
6561
6562static void igc_deliver_wake_packet(struct net_device *netdev)
6563{
6564        struct igc_adapter *adapter = netdev_priv(netdev);
6565        struct igc_hw *hw = &adapter->hw;
6566        struct sk_buff *skb;
6567        u32 wupl;
6568
6569        wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK;
6570
6571        /* WUPM stores only the first 128 bytes of the wake packet.
6572         * Read the packet only if we have the whole thing.
6573         */
6574        if (wupl == 0 || wupl > IGC_WUPM_BYTES)
6575                return;
6576
6577        skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES);
6578        if (!skb)
6579                return;
6580
6581        skb_put(skb, wupl);
6582
6583        /* Ensure reads are 32-bit aligned */
6584        wupl = roundup(wupl, 4);
6585
6586        memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl);
6587
6588        skb->protocol = eth_type_trans(skb, netdev);
6589        netif_rx(skb);
6590}
6591
6592static int __maybe_unused igc_resume(struct device *dev)
6593{
6594        struct pci_dev *pdev = to_pci_dev(dev);
6595        struct net_device *netdev = pci_get_drvdata(pdev);
6596        struct igc_adapter *adapter = netdev_priv(netdev);
6597        struct igc_hw *hw = &adapter->hw;
6598        u32 err, val;
6599
6600        pci_set_power_state(pdev, PCI_D0);
6601        pci_restore_state(pdev);
6602        pci_save_state(pdev);
6603
6604        if (!pci_device_is_present(pdev))
6605                return -ENODEV;
6606        err = pci_enable_device_mem(pdev);
6607        if (err) {
6608                netdev_err(netdev, "Cannot enable PCI device from suspend\n");
6609                return err;
6610        }
6611        pci_set_master(pdev);
6612
6613        pci_enable_wake(pdev, PCI_D3hot, 0);
6614        pci_enable_wake(pdev, PCI_D3cold, 0);
6615
6616        if (igc_init_interrupt_scheme(adapter, true)) {
6617                netdev_err(netdev, "Unable to allocate memory for queues\n");
6618                return -ENOMEM;
6619        }
6620
6621        igc_reset(adapter);
6622
6623        /* let the f/w know that the h/w is now under the control of the
6624         * driver.
6625         */
6626        igc_get_hw_control(adapter);
6627
6628        val = rd32(IGC_WUS);
6629        if (val & WAKE_PKT_WUS)
6630                igc_deliver_wake_packet(netdev);
6631
6632        wr32(IGC_WUS, ~0);
6633
6634        rtnl_lock();
6635        if (!err && netif_running(netdev))
6636                err = __igc_open(netdev, true);
6637
6638        if (!err)
6639                netif_device_attach(netdev);
6640        rtnl_unlock();
6641
6642        return err;
6643}
6644
6645static int __maybe_unused igc_runtime_resume(struct device *dev)
6646{
6647        return igc_resume(dev);
6648}
6649
6650static int __maybe_unused igc_suspend(struct device *dev)
6651{
6652        return __igc_shutdown(to_pci_dev(dev), NULL, 0);
6653}
6654
6655static int __maybe_unused igc_runtime_idle(struct device *dev)
6656{
6657        struct net_device *netdev = dev_get_drvdata(dev);
6658        struct igc_adapter *adapter = netdev_priv(netdev);
6659
6660        if (!igc_has_link(adapter))
6661                pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6662
6663        return -EBUSY;
6664}
6665#endif /* CONFIG_PM */
6666
6667static void igc_shutdown(struct pci_dev *pdev)
6668{
6669        bool wake;
6670
6671        __igc_shutdown(pdev, &wake, 0);
6672
6673        if (system_state == SYSTEM_POWER_OFF) {
6674                pci_wake_from_d3(pdev, wake);
6675                pci_set_power_state(pdev, PCI_D3hot);
6676        }
6677}
6678
6679/**
6680 *  igc_io_error_detected - called when PCI error is detected
6681 *  @pdev: Pointer to PCI device
6682 *  @state: The current PCI connection state
6683 *
6684 *  This function is called after a PCI bus error affecting
6685 *  this device has been detected.
6686 **/
6687static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
6688                                              pci_channel_state_t state)
6689{
6690        struct net_device *netdev = pci_get_drvdata(pdev);
6691        struct igc_adapter *adapter = netdev_priv(netdev);
6692
6693        netif_device_detach(netdev);
6694
6695        if (state == pci_channel_io_perm_failure)
6696                return PCI_ERS_RESULT_DISCONNECT;
6697
6698        if (netif_running(netdev))
6699                igc_down(adapter);
6700        pci_disable_device(pdev);
6701
6702        /* Request a slot reset. */
6703        return PCI_ERS_RESULT_NEED_RESET;
6704}
6705
6706/**
6707 *  igc_io_slot_reset - called after the PCI bus has been reset.
6708 *  @pdev: Pointer to PCI device
6709 *
6710 *  Restart the card from scratch, as if from a cold-boot. Implementation
6711 *  resembles the first-half of the igc_resume routine.
6712 **/
6713static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
6714{
6715        struct net_device *netdev = pci_get_drvdata(pdev);
6716        struct igc_adapter *adapter = netdev_priv(netdev);
6717        struct igc_hw *hw = &adapter->hw;
6718        pci_ers_result_t result;
6719
6720        if (pci_enable_device_mem(pdev)) {
6721                netdev_err(netdev, "Could not re-enable PCI device after reset\n");
6722                result = PCI_ERS_RESULT_DISCONNECT;
6723        } else {
6724                pci_set_master(pdev);
6725                pci_restore_state(pdev);
6726                pci_save_state(pdev);
6727
6728                pci_enable_wake(pdev, PCI_D3hot, 0);
6729                pci_enable_wake(pdev, PCI_D3cold, 0);
6730
6731                /* In case of PCI error, adapter loses its HW address
6732                 * so we should re-assign it here.
6733                 */
6734                hw->hw_addr = adapter->io_addr;
6735
6736                igc_reset(adapter);
6737                wr32(IGC_WUS, ~0);
6738                result = PCI_ERS_RESULT_RECOVERED;
6739        }
6740
6741        return result;
6742}
6743
6744/**
6745 *  igc_io_resume - called when traffic can start to flow again.
6746 *  @pdev: Pointer to PCI device
6747 *
6748 *  This callback is called when the error recovery driver tells us that
6749 *  its OK to resume normal operation. Implementation resembles the
6750 *  second-half of the igc_resume routine.
6751 */
6752static void igc_io_resume(struct pci_dev *pdev)
6753{
6754        struct net_device *netdev = pci_get_drvdata(pdev);
6755        struct igc_adapter *adapter = netdev_priv(netdev);
6756
6757        rtnl_lock();
6758        if (netif_running(netdev)) {
6759                if (igc_open(netdev)) {
6760                        netdev_err(netdev, "igc_open failed after reset\n");
6761                        return;
6762                }
6763        }
6764
6765        netif_device_attach(netdev);
6766
6767        /* let the f/w know that the h/w is now under the control of the
6768         * driver.
6769         */
6770        igc_get_hw_control(adapter);
6771        rtnl_unlock();
6772}
6773
6774static const struct pci_error_handlers igc_err_handler = {
6775        .error_detected = igc_io_error_detected,
6776        .slot_reset = igc_io_slot_reset,
6777        .resume = igc_io_resume,
6778};
6779
6780#ifdef CONFIG_PM
6781static const struct dev_pm_ops igc_pm_ops = {
6782        SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
6783        SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
6784                           igc_runtime_idle)
6785};
6786#endif
6787
6788static struct pci_driver igc_driver = {
6789        .name     = igc_driver_name,
6790        .id_table = igc_pci_tbl,
6791        .probe    = igc_probe,
6792        .remove   = igc_remove,
6793#ifdef CONFIG_PM
6794        .driver.pm = &igc_pm_ops,
6795#endif
6796        .shutdown = igc_shutdown,
6797        .err_handler = &igc_err_handler,
6798};
6799
6800/**
6801 * igc_reinit_queues - return error
6802 * @adapter: pointer to adapter structure
6803 */
6804int igc_reinit_queues(struct igc_adapter *adapter)
6805{
6806        struct net_device *netdev = adapter->netdev;
6807        int err = 0;
6808
6809        if (netif_running(netdev))
6810                igc_close(netdev);
6811
6812        igc_reset_interrupt_capability(adapter);
6813
6814        if (igc_init_interrupt_scheme(adapter, true)) {
6815                netdev_err(netdev, "Unable to allocate memory for queues\n");
6816                return -ENOMEM;
6817        }
6818
6819        if (netif_running(netdev))
6820                err = igc_open(netdev);
6821
6822        return err;
6823}
6824
6825/**
6826 * igc_get_hw_dev - return device
6827 * @hw: pointer to hardware structure
6828 *
6829 * used by hardware layer to print debugging information
6830 */
6831struct net_device *igc_get_hw_dev(struct igc_hw *hw)
6832{
6833        struct igc_adapter *adapter = hw->back;
6834
6835        return adapter->netdev;
6836}
6837
6838static void igc_disable_rx_ring_hw(struct igc_ring *ring)
6839{
6840        struct igc_hw *hw = &ring->q_vector->adapter->hw;
6841        u8 idx = ring->reg_idx;
6842        u32 rxdctl;
6843
6844        rxdctl = rd32(IGC_RXDCTL(idx));
6845        rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE;
6846        rxdctl |= IGC_RXDCTL_SWFLUSH;
6847        wr32(IGC_RXDCTL(idx), rxdctl);
6848}
6849
6850void igc_disable_rx_ring(struct igc_ring *ring)
6851{
6852        igc_disable_rx_ring_hw(ring);
6853        igc_clean_rx_ring(ring);
6854}
6855
6856void igc_enable_rx_ring(struct igc_ring *ring)
6857{
6858        struct igc_adapter *adapter = ring->q_vector->adapter;
6859
6860        igc_configure_rx_ring(adapter, ring);
6861
6862        if (ring->xsk_pool)
6863                igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
6864        else
6865                igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
6866}
6867
6868static void igc_disable_tx_ring_hw(struct igc_ring *ring)
6869{
6870        struct igc_hw *hw = &ring->q_vector->adapter->hw;
6871        u8 idx = ring->reg_idx;
6872        u32 txdctl;
6873
6874        txdctl = rd32(IGC_TXDCTL(idx));
6875        txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
6876        txdctl |= IGC_TXDCTL_SWFLUSH;
6877        wr32(IGC_TXDCTL(idx), txdctl);
6878}
6879
6880void igc_disable_tx_ring(struct igc_ring *ring)
6881{
6882        igc_disable_tx_ring_hw(ring);
6883        igc_clean_tx_ring(ring);
6884}
6885
6886void igc_enable_tx_ring(struct igc_ring *ring)
6887{
6888        struct igc_adapter *adapter = ring->q_vector->adapter;
6889
6890        igc_configure_tx_ring(adapter, ring);
6891}
6892
6893/**
6894 * igc_init_module - Driver Registration Routine
6895 *
6896 * igc_init_module is the first routine called when the driver is
6897 * loaded. All it does is register with the PCI subsystem.
6898 */
6899static int __init igc_init_module(void)
6900{
6901        int ret;
6902
6903        pr_info("%s\n", igc_driver_string);
6904        pr_info("%s\n", igc_copyright);
6905
6906        ret = pci_register_driver(&igc_driver);
6907        return ret;
6908}
6909
6910module_init(igc_init_module);
6911
6912/**
6913 * igc_exit_module - Driver Exit Cleanup Routine
6914 *
6915 * igc_exit_module is called just before the driver is removed
6916 * from memory.
6917 */
6918static void __exit igc_exit_module(void)
6919{
6920        pci_unregister_driver(&igc_driver);
6921}
6922
6923module_exit(igc_exit_module);
6924/* igc_main.c */
6925