linux/drivers/net/ethernet/intel/ice/ice_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2018, Intel Corporation. */
   3
   4/* Intel(R) Ethernet Connection E800 Series Linux Driver */
   5
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7
   8#include "ice.h"
   9#include "ice_lib.h"
  10
  11#define DRV_VERSION     "0.7.2-k"
  12#define DRV_SUMMARY     "Intel(R) Ethernet Connection E800 Series Linux Driver"
  13const char ice_drv_ver[] = DRV_VERSION;
  14static const char ice_driver_string[] = DRV_SUMMARY;
  15static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
  16
  17MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
  18MODULE_DESCRIPTION(DRV_SUMMARY);
  19MODULE_LICENSE("GPL v2");
  20MODULE_VERSION(DRV_VERSION);
  21
  22static int debug = -1;
  23module_param(debug, int, 0644);
  24#ifndef CONFIG_DYNAMIC_DEBUG
  25MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
  26#else
  27MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
  28#endif /* !CONFIG_DYNAMIC_DEBUG */
  29
  30static struct workqueue_struct *ice_wq;
  31static const struct net_device_ops ice_netdev_ops;
  32
  33static void ice_pf_dis_all_vsi(struct ice_pf *pf);
  34static void ice_rebuild(struct ice_pf *pf);
  35
  36static void ice_vsi_release_all(struct ice_pf *pf);
  37static void ice_update_vsi_stats(struct ice_vsi *vsi);
  38static void ice_update_pf_stats(struct ice_pf *pf);
  39
  40/**
  41 * ice_get_tx_pending - returns number of Tx descriptors not processed
  42 * @ring: the ring of descriptors
  43 */
  44static u32 ice_get_tx_pending(struct ice_ring *ring)
  45{
  46        u32 head, tail;
  47
  48        head = ring->next_to_clean;
  49        tail = readl(ring->tail);
  50
  51        if (head != tail)
  52                return (head < tail) ?
  53                        tail - head : (tail + ring->count - head);
  54        return 0;
  55}
  56
  57/**
  58 * ice_check_for_hang_subtask - check for and recover hung queues
  59 * @pf: pointer to PF struct
  60 */
  61static void ice_check_for_hang_subtask(struct ice_pf *pf)
  62{
  63        struct ice_vsi *vsi = NULL;
  64        unsigned int i;
  65        u32 v, v_idx;
  66        int packets;
  67
  68        ice_for_each_vsi(pf, v)
  69                if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
  70                        vsi = pf->vsi[v];
  71                        break;
  72                }
  73
  74        if (!vsi || test_bit(__ICE_DOWN, vsi->state))
  75                return;
  76
  77        if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
  78                return;
  79
  80        for (i = 0; i < vsi->num_txq; i++) {
  81                struct ice_ring *tx_ring = vsi->tx_rings[i];
  82
  83                if (tx_ring && tx_ring->desc) {
  84                        int itr = ICE_ITR_NONE;
  85
  86                        /* If packet counter has not changed the queue is
  87                         * likely stalled, so force an interrupt for this
  88                         * queue.
  89                         *
  90                         * prev_pkt would be negative if there was no
  91                         * pending work.
  92                         */
  93                        packets = tx_ring->stats.pkts & INT_MAX;
  94                        if (tx_ring->tx_stats.prev_pkt == packets) {
  95                                /* Trigger sw interrupt to revive the queue */
  96                                v_idx = tx_ring->q_vector->v_idx;
  97                                wr32(&vsi->back->hw,
  98                                     GLINT_DYN_CTL(vsi->hw_base_vector + v_idx),
  99                                     (itr << GLINT_DYN_CTL_ITR_INDX_S) |
 100                                     GLINT_DYN_CTL_SWINT_TRIG_M |
 101                                     GLINT_DYN_CTL_INTENA_MSK_M);
 102                                continue;
 103                        }
 104
 105                        /* Memory barrier between read of packet count and call
 106                         * to ice_get_tx_pending()
 107                         */
 108                        smp_rmb();
 109                        tx_ring->tx_stats.prev_pkt =
 110                            ice_get_tx_pending(tx_ring) ? packets : -1;
 111                }
 112        }
 113}
 114
 115/**
 116 * ice_add_mac_to_sync_list - creates list of mac addresses to be synced
 117 * @netdev: the net device on which the sync is happening
 118 * @addr: mac address to sync
 119 *
 120 * This is a callback function which is called by the in kernel device sync
 121 * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
 122 * populates the tmp_sync_list, which is later used by ice_add_mac to add the
 123 * mac filters from the hardware.
 124 */
 125static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
 126{
 127        struct ice_netdev_priv *np = netdev_priv(netdev);
 128        struct ice_vsi *vsi = np->vsi;
 129
 130        if (ice_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr))
 131                return -EINVAL;
 132
 133        return 0;
 134}
 135
 136/**
 137 * ice_add_mac_to_unsync_list - creates list of mac addresses to be unsynced
 138 * @netdev: the net device on which the unsync is happening
 139 * @addr: mac address to unsync
 140 *
 141 * This is a callback function which is called by the in kernel device unsync
 142 * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
 143 * populates the tmp_unsync_list, which is later used by ice_remove_mac to
 144 * delete the mac filters from the hardware.
 145 */
 146static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
 147{
 148        struct ice_netdev_priv *np = netdev_priv(netdev);
 149        struct ice_vsi *vsi = np->vsi;
 150
 151        if (ice_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr))
 152                return -EINVAL;
 153
 154        return 0;
 155}
 156
 157/**
 158 * ice_vsi_fltr_changed - check if filter state changed
 159 * @vsi: VSI to be checked
 160 *
 161 * returns true if filter state has changed, false otherwise.
 162 */
 163static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
 164{
 165        return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) ||
 166               test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) ||
 167               test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
 168}
 169
 170/**
 171 * ice_vsi_sync_fltr - Update the VSI filter list to the HW
 172 * @vsi: ptr to the VSI
 173 *
 174 * Push any outstanding VSI filter changes through the AdminQ.
 175 */
 176static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
 177{
 178        struct device *dev = &vsi->back->pdev->dev;
 179        struct net_device *netdev = vsi->netdev;
 180        bool promisc_forced_on = false;
 181        struct ice_pf *pf = vsi->back;
 182        struct ice_hw *hw = &pf->hw;
 183        enum ice_status status = 0;
 184        u32 changed_flags = 0;
 185        int err = 0;
 186
 187        if (!vsi->netdev)
 188                return -EINVAL;
 189
 190        while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state))
 191                usleep_range(1000, 2000);
 192
 193        changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
 194        vsi->current_netdev_flags = vsi->netdev->flags;
 195
 196        INIT_LIST_HEAD(&vsi->tmp_sync_list);
 197        INIT_LIST_HEAD(&vsi->tmp_unsync_list);
 198
 199        if (ice_vsi_fltr_changed(vsi)) {
 200                clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
 201                clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
 202                clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
 203
 204                /* grab the netdev's addr_list_lock */
 205                netif_addr_lock_bh(netdev);
 206                __dev_uc_sync(netdev, ice_add_mac_to_sync_list,
 207                              ice_add_mac_to_unsync_list);
 208                __dev_mc_sync(netdev, ice_add_mac_to_sync_list,
 209                              ice_add_mac_to_unsync_list);
 210                /* our temp lists are populated. release lock */
 211                netif_addr_unlock_bh(netdev);
 212        }
 213
 214        /* Remove mac addresses in the unsync list */
 215        status = ice_remove_mac(hw, &vsi->tmp_unsync_list);
 216        ice_free_fltr_list(dev, &vsi->tmp_unsync_list);
 217        if (status) {
 218                netdev_err(netdev, "Failed to delete MAC filters\n");
 219                /* if we failed because of alloc failures, just bail */
 220                if (status == ICE_ERR_NO_MEMORY) {
 221                        err = -ENOMEM;
 222                        goto out;
 223                }
 224        }
 225
 226        /* Add mac addresses in the sync list */
 227        status = ice_add_mac(hw, &vsi->tmp_sync_list);
 228        ice_free_fltr_list(dev, &vsi->tmp_sync_list);
 229        if (status) {
 230                netdev_err(netdev, "Failed to add MAC filters\n");
 231                /* If there is no more space for new umac filters, vsi
 232                 * should go into promiscuous mode. There should be some
 233                 * space reserved for promiscuous filters.
 234                 */
 235                if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC &&
 236                    !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC,
 237                                      vsi->state)) {
 238                        promisc_forced_on = true;
 239                        netdev_warn(netdev,
 240                                    "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
 241                                    vsi->vsi_num);
 242                } else {
 243                        err = -EIO;
 244                        goto out;
 245                }
 246        }
 247        /* check for changes in promiscuous modes */
 248        if (changed_flags & IFF_ALLMULTI)
 249                netdev_warn(netdev, "Unsupported configuration\n");
 250
 251        if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
 252            test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) {
 253                clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
 254                if (vsi->current_netdev_flags & IFF_PROMISC) {
 255                        /* Apply TX filter rule to get traffic from VMs */
 256                        status = ice_cfg_dflt_vsi(hw, vsi->idx, true,
 257                                                  ICE_FLTR_TX);
 258                        if (status) {
 259                                netdev_err(netdev, "Error setting default VSI %i tx rule\n",
 260                                           vsi->vsi_num);
 261                                vsi->current_netdev_flags &= ~IFF_PROMISC;
 262                                err = -EIO;
 263                                goto out_promisc;
 264                        }
 265                        /* Apply RX filter rule to get traffic from wire */
 266                        status = ice_cfg_dflt_vsi(hw, vsi->idx, true,
 267                                                  ICE_FLTR_RX);
 268                        if (status) {
 269                                netdev_err(netdev, "Error setting default VSI %i rx rule\n",
 270                                           vsi->vsi_num);
 271                                vsi->current_netdev_flags &= ~IFF_PROMISC;
 272                                err = -EIO;
 273                                goto out_promisc;
 274                        }
 275                } else {
 276                        /* Clear TX filter rule to stop traffic from VMs */
 277                        status = ice_cfg_dflt_vsi(hw, vsi->idx, false,
 278                                                  ICE_FLTR_TX);
 279                        if (status) {
 280                                netdev_err(netdev, "Error clearing default VSI %i tx rule\n",
 281                                           vsi->vsi_num);
 282                                vsi->current_netdev_flags |= IFF_PROMISC;
 283                                err = -EIO;
 284                                goto out_promisc;
 285                        }
 286                        /* Clear RX filter to remove traffic from wire */
 287                        status = ice_cfg_dflt_vsi(hw, vsi->idx, false,
 288                                                  ICE_FLTR_RX);
 289                        if (status) {
 290                                netdev_err(netdev, "Error clearing default VSI %i rx rule\n",
 291                                           vsi->vsi_num);
 292                                vsi->current_netdev_flags |= IFF_PROMISC;
 293                                err = -EIO;
 294                                goto out_promisc;
 295                        }
 296                }
 297        }
 298        goto exit;
 299
 300out_promisc:
 301        set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
 302        goto exit;
 303out:
 304        /* if something went wrong then set the changed flag so we try again */
 305        set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
 306        set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
 307exit:
 308        clear_bit(__ICE_CFG_BUSY, vsi->state);
 309        return err;
 310}
 311
 312/**
 313 * ice_sync_fltr_subtask - Sync the VSI filter list with HW
 314 * @pf: board private structure
 315 */
 316static void ice_sync_fltr_subtask(struct ice_pf *pf)
 317{
 318        int v;
 319
 320        if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
 321                return;
 322
 323        clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
 324
 325        for (v = 0; v < pf->num_alloc_vsi; v++)
 326                if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) &&
 327                    ice_vsi_sync_fltr(pf->vsi[v])) {
 328                        /* come back and try again later */
 329                        set_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
 330                        break;
 331                }
 332}
 333
 334/**
 335 * ice_prepare_for_reset - prep for the core to reset
 336 * @pf: board private structure
 337 *
 338 * Inform or close all dependent features in prep for reset.
 339 */
 340static void
 341ice_prepare_for_reset(struct ice_pf *pf)
 342{
 343        struct ice_hw *hw = &pf->hw;
 344
 345        /* Notify VFs of impending reset */
 346        if (ice_check_sq_alive(hw, &hw->mailboxq))
 347                ice_vc_notify_reset(pf);
 348
 349        /* disable the VSIs and their queues that are not already DOWN */
 350        ice_pf_dis_all_vsi(pf);
 351
 352        if (hw->port_info)
 353                ice_sched_clear_port(hw->port_info);
 354
 355        ice_shutdown_all_ctrlq(hw);
 356
 357        set_bit(__ICE_PREPARED_FOR_RESET, pf->state);
 358}
 359
 360/**
 361 * ice_do_reset - Initiate one of many types of resets
 362 * @pf: board private structure
 363 * @reset_type: reset type requested
 364 * before this function was called.
 365 */
 366static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
 367{
 368        struct device *dev = &pf->pdev->dev;
 369        struct ice_hw *hw = &pf->hw;
 370
 371        dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
 372        WARN_ON(in_interrupt());
 373
 374        ice_prepare_for_reset(pf);
 375
 376        /* trigger the reset */
 377        if (ice_reset(hw, reset_type)) {
 378                dev_err(dev, "reset %d failed\n", reset_type);
 379                set_bit(__ICE_RESET_FAILED, pf->state);
 380                clear_bit(__ICE_RESET_OICR_RECV, pf->state);
 381                clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
 382                clear_bit(__ICE_PFR_REQ, pf->state);
 383                clear_bit(__ICE_CORER_REQ, pf->state);
 384                clear_bit(__ICE_GLOBR_REQ, pf->state);
 385                return;
 386        }
 387
 388        /* PFR is a bit of a special case because it doesn't result in an OICR
 389         * interrupt. So for PFR, rebuild after the reset and clear the reset-
 390         * associated state bits.
 391         */
 392        if (reset_type == ICE_RESET_PFR) {
 393                pf->pfr_count++;
 394                ice_rebuild(pf);
 395                clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
 396                clear_bit(__ICE_PFR_REQ, pf->state);
 397        }
 398}
 399
 400/**
 401 * ice_reset_subtask - Set up for resetting the device and driver
 402 * @pf: board private structure
 403 */
 404static void ice_reset_subtask(struct ice_pf *pf)
 405{
 406        enum ice_reset_req reset_type = ICE_RESET_INVAL;
 407
 408        /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
 409         * OICR interrupt. The OICR handler (ice_misc_intr) determines what type
 410         * of reset is pending and sets bits in pf->state indicating the reset
 411         * type and __ICE_RESET_OICR_RECV. So, if the latter bit is set
 412         * prepare for pending reset if not already (for PF software-initiated
 413         * global resets the software should already be prepared for it as
 414         * indicated by __ICE_PREPARED_FOR_RESET; for global resets initiated
 415         * by firmware or software on other PFs, that bit is not set so prepare
 416         * for the reset now), poll for reset done, rebuild and return.
 417         */
 418        if (test_bit(__ICE_RESET_OICR_RECV, pf->state)) {
 419                clear_bit(__ICE_GLOBR_RECV, pf->state);
 420                clear_bit(__ICE_CORER_RECV, pf->state);
 421                if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state))
 422                        ice_prepare_for_reset(pf);
 423
 424                /* make sure we are ready to rebuild */
 425                if (ice_check_reset(&pf->hw)) {
 426                        set_bit(__ICE_RESET_FAILED, pf->state);
 427                } else {
 428                        /* done with reset. start rebuild */
 429                        pf->hw.reset_ongoing = false;
 430                        ice_rebuild(pf);
 431                        /* clear bit to resume normal operations, but
 432                         * ICE_NEEDS_RESTART bit is set incase rebuild failed
 433                         */
 434                        clear_bit(__ICE_RESET_OICR_RECV, pf->state);
 435                        clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
 436                        clear_bit(__ICE_PFR_REQ, pf->state);
 437                        clear_bit(__ICE_CORER_REQ, pf->state);
 438                        clear_bit(__ICE_GLOBR_REQ, pf->state);
 439                }
 440
 441                return;
 442        }
 443
 444        /* No pending resets to finish processing. Check for new resets */
 445        if (test_bit(__ICE_PFR_REQ, pf->state))
 446                reset_type = ICE_RESET_PFR;
 447        if (test_bit(__ICE_CORER_REQ, pf->state))
 448                reset_type = ICE_RESET_CORER;
 449        if (test_bit(__ICE_GLOBR_REQ, pf->state))
 450                reset_type = ICE_RESET_GLOBR;
 451        /* If no valid reset type requested just return */
 452        if (reset_type == ICE_RESET_INVAL)
 453                return;
 454
 455        /* reset if not already down or busy */
 456        if (!test_bit(__ICE_DOWN, pf->state) &&
 457            !test_bit(__ICE_CFG_BUSY, pf->state)) {
 458                ice_do_reset(pf, reset_type);
 459        }
 460}
 461
 462/**
 463 * ice_print_link_msg - print link up or down message
 464 * @vsi: the VSI whose link status is being queried
 465 * @isup: boolean for if the link is now up or down
 466 */
 467void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 468{
 469        const char *speed;
 470        const char *fc;
 471
 472        if (vsi->current_isup == isup)
 473                return;
 474
 475        vsi->current_isup = isup;
 476
 477        if (!isup) {
 478                netdev_info(vsi->netdev, "NIC Link is Down\n");
 479                return;
 480        }
 481
 482        switch (vsi->port_info->phy.link_info.link_speed) {
 483        case ICE_AQ_LINK_SPEED_40GB:
 484                speed = "40 G";
 485                break;
 486        case ICE_AQ_LINK_SPEED_25GB:
 487                speed = "25 G";
 488                break;
 489        case ICE_AQ_LINK_SPEED_20GB:
 490                speed = "20 G";
 491                break;
 492        case ICE_AQ_LINK_SPEED_10GB:
 493                speed = "10 G";
 494                break;
 495        case ICE_AQ_LINK_SPEED_5GB:
 496                speed = "5 G";
 497                break;
 498        case ICE_AQ_LINK_SPEED_2500MB:
 499                speed = "2.5 G";
 500                break;
 501        case ICE_AQ_LINK_SPEED_1000MB:
 502                speed = "1 G";
 503                break;
 504        case ICE_AQ_LINK_SPEED_100MB:
 505                speed = "100 M";
 506                break;
 507        default:
 508                speed = "Unknown";
 509                break;
 510        }
 511
 512        switch (vsi->port_info->fc.current_mode) {
 513        case ICE_FC_FULL:
 514                fc = "RX/TX";
 515                break;
 516        case ICE_FC_TX_PAUSE:
 517                fc = "TX";
 518                break;
 519        case ICE_FC_RX_PAUSE:
 520                fc = "RX";
 521                break;
 522        default:
 523                fc = "Unknown";
 524                break;
 525        }
 526
 527        netdev_info(vsi->netdev, "NIC Link is up %sbps, Flow Control: %s\n",
 528                    speed, fc);
 529}
 530
 531/**
 532 * ice_vsi_link_event - update the vsi's netdev
 533 * @vsi: the vsi on which the link event occurred
 534 * @link_up: whether or not the vsi needs to be set up or down
 535 */
 536static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
 537{
 538        if (!vsi || test_bit(__ICE_DOWN, vsi->state))
 539                return;
 540
 541        if (vsi->type == ICE_VSI_PF) {
 542                if (!vsi->netdev) {
 543                        dev_dbg(&vsi->back->pdev->dev,
 544                                "vsi->netdev is not initialized!\n");
 545                        return;
 546                }
 547                if (link_up) {
 548                        netif_carrier_on(vsi->netdev);
 549                        netif_tx_wake_all_queues(vsi->netdev);
 550                } else {
 551                        netif_carrier_off(vsi->netdev);
 552                        netif_tx_stop_all_queues(vsi->netdev);
 553                }
 554        }
 555}
 556
 557/**
 558 * ice_link_event - process the link event
 559 * @pf: pf that the link event is associated with
 560 * @pi: port_info for the port that the link event is associated with
 561 *
 562 * Returns -EIO if ice_get_link_status() fails
 563 * Returns 0 on success
 564 */
 565static int
 566ice_link_event(struct ice_pf *pf, struct ice_port_info *pi)
 567{
 568        u8 new_link_speed, old_link_speed;
 569        struct ice_phy_info *phy_info;
 570        bool new_link_same_as_old;
 571        bool new_link, old_link;
 572        u8 lport;
 573        u16 v;
 574
 575        phy_info = &pi->phy;
 576        phy_info->link_info_old = phy_info->link_info;
 577        /* Force ice_get_link_status() to update link info */
 578        phy_info->get_link_info = true;
 579
 580        old_link = (phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
 581        old_link_speed = phy_info->link_info_old.link_speed;
 582
 583        lport = pi->lport;
 584        if (ice_get_link_status(pi, &new_link)) {
 585                dev_dbg(&pf->pdev->dev,
 586                        "Could not get link status for port %d\n", lport);
 587                return -EIO;
 588        }
 589
 590        new_link_speed = phy_info->link_info.link_speed;
 591
 592        new_link_same_as_old = (new_link == old_link &&
 593                                new_link_speed == old_link_speed);
 594
 595        ice_for_each_vsi(pf, v) {
 596                struct ice_vsi *vsi = pf->vsi[v];
 597
 598                if (!vsi || !vsi->port_info)
 599                        continue;
 600
 601                if (new_link_same_as_old &&
 602                    (test_bit(__ICE_DOWN, vsi->state) ||
 603                    new_link == netif_carrier_ok(vsi->netdev)))
 604                        continue;
 605
 606                if (vsi->port_info->lport == lport) {
 607                        ice_print_link_msg(vsi, new_link);
 608                        ice_vsi_link_event(vsi, new_link);
 609                }
 610        }
 611
 612        if (!new_link_same_as_old && pf->num_alloc_vfs)
 613                ice_vc_notify_link_state(pf);
 614
 615        return 0;
 616}
 617
 618/**
 619 * ice_watchdog_subtask - periodic tasks not using event driven scheduling
 620 * @pf: board private structure
 621 */
 622static void ice_watchdog_subtask(struct ice_pf *pf)
 623{
 624        int i;
 625
 626        /* if interface is down do nothing */
 627        if (test_bit(__ICE_DOWN, pf->state) ||
 628            test_bit(__ICE_CFG_BUSY, pf->state))
 629                return;
 630
 631        /* make sure we don't do these things too often */
 632        if (time_before(jiffies,
 633                        pf->serv_tmr_prev + pf->serv_tmr_period))
 634                return;
 635
 636        pf->serv_tmr_prev = jiffies;
 637
 638        if (ice_link_event(pf, pf->hw.port_info))
 639                dev_dbg(&pf->pdev->dev, "ice_link_event failed\n");
 640
 641        /* Update the stats for active netdevs so the network stack
 642         * can look at updated numbers whenever it cares to
 643         */
 644        ice_update_pf_stats(pf);
 645        for (i = 0; i < pf->num_alloc_vsi; i++)
 646                if (pf->vsi[i] && pf->vsi[i]->netdev)
 647                        ice_update_vsi_stats(pf->vsi[i]);
 648}
 649
 650/**
 651 * __ice_clean_ctrlq - helper function to clean controlq rings
 652 * @pf: ptr to struct ice_pf
 653 * @q_type: specific Control queue type
 654 */
 655static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 656{
 657        struct ice_rq_event_info event;
 658        struct ice_hw *hw = &pf->hw;
 659        struct ice_ctl_q_info *cq;
 660        u16 pending, i = 0;
 661        const char *qtype;
 662        u32 oldval, val;
 663
 664        /* Do not clean control queue if/when PF reset fails */
 665        if (test_bit(__ICE_RESET_FAILED, pf->state))
 666                return 0;
 667
 668        switch (q_type) {
 669        case ICE_CTL_Q_ADMIN:
 670                cq = &hw->adminq;
 671                qtype = "Admin";
 672                break;
 673        case ICE_CTL_Q_MAILBOX:
 674                cq = &hw->mailboxq;
 675                qtype = "Mailbox";
 676                break;
 677        default:
 678                dev_warn(&pf->pdev->dev, "Unknown control queue type 0x%x\n",
 679                         q_type);
 680                return 0;
 681        }
 682
 683        /* check for error indications - PF_xx_AxQLEN register layout for
 684         * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
 685         */
 686        val = rd32(hw, cq->rq.len);
 687        if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
 688                   PF_FW_ARQLEN_ARQCRIT_M)) {
 689                oldval = val;
 690                if (val & PF_FW_ARQLEN_ARQVFE_M)
 691                        dev_dbg(&pf->pdev->dev,
 692                                "%s Receive Queue VF Error detected\n", qtype);
 693                if (val & PF_FW_ARQLEN_ARQOVFL_M) {
 694                        dev_dbg(&pf->pdev->dev,
 695                                "%s Receive Queue Overflow Error detected\n",
 696                                qtype);
 697                }
 698                if (val & PF_FW_ARQLEN_ARQCRIT_M)
 699                        dev_dbg(&pf->pdev->dev,
 700                                "%s Receive Queue Critical Error detected\n",
 701                                qtype);
 702                val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
 703                         PF_FW_ARQLEN_ARQCRIT_M);
 704                if (oldval != val)
 705                        wr32(hw, cq->rq.len, val);
 706        }
 707
 708        val = rd32(hw, cq->sq.len);
 709        if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
 710                   PF_FW_ATQLEN_ATQCRIT_M)) {
 711                oldval = val;
 712                if (val & PF_FW_ATQLEN_ATQVFE_M)
 713                        dev_dbg(&pf->pdev->dev,
 714                                "%s Send Queue VF Error detected\n", qtype);
 715                if (val & PF_FW_ATQLEN_ATQOVFL_M) {
 716                        dev_dbg(&pf->pdev->dev,
 717                                "%s Send Queue Overflow Error detected\n",
 718                                qtype);
 719                }
 720                if (val & PF_FW_ATQLEN_ATQCRIT_M)
 721                        dev_dbg(&pf->pdev->dev,
 722                                "%s Send Queue Critical Error detected\n",
 723                                qtype);
 724                val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
 725                         PF_FW_ATQLEN_ATQCRIT_M);
 726                if (oldval != val)
 727                        wr32(hw, cq->sq.len, val);
 728        }
 729
 730        event.buf_len = cq->rq_buf_size;
 731        event.msg_buf = devm_kzalloc(&pf->pdev->dev, event.buf_len,
 732                                     GFP_KERNEL);
 733        if (!event.msg_buf)
 734                return 0;
 735
 736        do {
 737                enum ice_status ret;
 738                u16 opcode;
 739
 740                ret = ice_clean_rq_elem(hw, cq, &event, &pending);
 741                if (ret == ICE_ERR_AQ_NO_WORK)
 742                        break;
 743                if (ret) {
 744                        dev_err(&pf->pdev->dev,
 745                                "%s Receive Queue event error %d\n", qtype,
 746                                ret);
 747                        break;
 748                }
 749
 750                opcode = le16_to_cpu(event.desc.opcode);
 751
 752                switch (opcode) {
 753                case ice_mbx_opc_send_msg_to_pf:
 754                        ice_vc_process_vf_msg(pf, &event);
 755                        break;
 756                case ice_aqc_opc_fw_logging:
 757                        ice_output_fw_log(hw, &event.desc, event.msg_buf);
 758                        break;
 759                default:
 760                        dev_dbg(&pf->pdev->dev,
 761                                "%s Receive Queue unknown event 0x%04x ignored\n",
 762                                qtype, opcode);
 763                        break;
 764                }
 765        } while (pending && (i++ < ICE_DFLT_IRQ_WORK));
 766
 767        devm_kfree(&pf->pdev->dev, event.msg_buf);
 768
 769        return pending && (i == ICE_DFLT_IRQ_WORK);
 770}
 771
 772/**
 773 * ice_ctrlq_pending - check if there is a difference between ntc and ntu
 774 * @hw: pointer to hardware info
 775 * @cq: control queue information
 776 *
 777 * returns true if there are pending messages in a queue, false if there aren't
 778 */
 779static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 780{
 781        u16 ntu;
 782
 783        ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
 784        return cq->rq.next_to_clean != ntu;
 785}
 786
 787/**
 788 * ice_clean_adminq_subtask - clean the AdminQ rings
 789 * @pf: board private structure
 790 */
 791static void ice_clean_adminq_subtask(struct ice_pf *pf)
 792{
 793        struct ice_hw *hw = &pf->hw;
 794
 795        if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
 796                return;
 797
 798        if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN))
 799                return;
 800
 801        clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
 802
 803        /* There might be a situation where new messages arrive to a control
 804         * queue between processing the last message and clearing the
 805         * EVENT_PENDING bit. So before exiting, check queue head again (using
 806         * ice_ctrlq_pending) and process new messages if any.
 807         */
 808        if (ice_ctrlq_pending(hw, &hw->adminq))
 809                __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN);
 810
 811        ice_flush(hw);
 812}
 813
 814/**
 815 * ice_clean_mailboxq_subtask - clean the MailboxQ rings
 816 * @pf: board private structure
 817 */
 818static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
 819{
 820        struct ice_hw *hw = &pf->hw;
 821
 822        if (!test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state))
 823                return;
 824
 825        if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX))
 826                return;
 827
 828        clear_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state);
 829
 830        if (ice_ctrlq_pending(hw, &hw->mailboxq))
 831                __ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX);
 832
 833        ice_flush(hw);
 834}
 835
 836/**
 837 * ice_service_task_schedule - schedule the service task to wake up
 838 * @pf: board private structure
 839 *
 840 * If not already scheduled, this puts the task into the work queue.
 841 */
 842static void ice_service_task_schedule(struct ice_pf *pf)
 843{
 844        if (!test_bit(__ICE_SERVICE_DIS, pf->state) &&
 845            !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state) &&
 846            !test_bit(__ICE_NEEDS_RESTART, pf->state))
 847                queue_work(ice_wq, &pf->serv_task);
 848}
 849
 850/**
 851 * ice_service_task_complete - finish up the service task
 852 * @pf: board private structure
 853 */
 854static void ice_service_task_complete(struct ice_pf *pf)
 855{
 856        WARN_ON(!test_bit(__ICE_SERVICE_SCHED, pf->state));
 857
 858        /* force memory (pf->state) to sync before next service task */
 859        smp_mb__before_atomic();
 860        clear_bit(__ICE_SERVICE_SCHED, pf->state);
 861}
 862
 863/**
 864 * ice_service_task_stop - stop service task and cancel works
 865 * @pf: board private structure
 866 */
 867static void ice_service_task_stop(struct ice_pf *pf)
 868{
 869        set_bit(__ICE_SERVICE_DIS, pf->state);
 870
 871        if (pf->serv_tmr.function)
 872                del_timer_sync(&pf->serv_tmr);
 873        if (pf->serv_task.func)
 874                cancel_work_sync(&pf->serv_task);
 875
 876        clear_bit(__ICE_SERVICE_SCHED, pf->state);
 877}
 878
 879/**
 880 * ice_service_task_restart - restart service task and schedule works
 881 * @pf: board private structure
 882 *
 883 * This function is needed for suspend and resume works (e.g WoL scenario)
 884 */
 885static void ice_service_task_restart(struct ice_pf *pf)
 886{
 887        clear_bit(__ICE_SERVICE_DIS, pf->state);
 888        ice_service_task_schedule(pf);
 889}
 890
 891/**
 892 * ice_service_timer - timer callback to schedule service task
 893 * @t: pointer to timer_list
 894 */
 895static void ice_service_timer(struct timer_list *t)
 896{
 897        struct ice_pf *pf = from_timer(pf, t, serv_tmr);
 898
 899        mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies));
 900        ice_service_task_schedule(pf);
 901}
 902
 903/**
 904 * ice_handle_mdd_event - handle malicious driver detect event
 905 * @pf: pointer to the PF structure
 906 *
 907 * Called from service task. OICR interrupt handler indicates MDD event
 908 */
 909static void ice_handle_mdd_event(struct ice_pf *pf)
 910{
 911        struct ice_hw *hw = &pf->hw;
 912        bool mdd_detected = false;
 913        u32 reg;
 914        int i;
 915
 916        if (!test_bit(__ICE_MDD_EVENT_PENDING, pf->state))
 917                return;
 918
 919        /* find what triggered the MDD event */
 920        reg = rd32(hw, GL_MDET_TX_PQM);
 921        if (reg & GL_MDET_TX_PQM_VALID_M) {
 922                u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >>
 923                                GL_MDET_TX_PQM_PF_NUM_S;
 924                u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >>
 925                                GL_MDET_TX_PQM_VF_NUM_S;
 926                u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >>
 927                                GL_MDET_TX_PQM_MAL_TYPE_S;
 928                u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >>
 929                                GL_MDET_TX_PQM_QNUM_S);
 930
 931                if (netif_msg_tx_err(pf))
 932                        dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
 933                                 event, queue, pf_num, vf_num);
 934                wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
 935                mdd_detected = true;
 936        }
 937
 938        reg = rd32(hw, GL_MDET_TX_TCLAN);
 939        if (reg & GL_MDET_TX_TCLAN_VALID_M) {
 940                u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >>
 941                                GL_MDET_TX_TCLAN_PF_NUM_S;
 942                u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >>
 943                                GL_MDET_TX_TCLAN_VF_NUM_S;
 944                u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >>
 945                                GL_MDET_TX_TCLAN_MAL_TYPE_S;
 946                u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >>
 947                                GL_MDET_TX_TCLAN_QNUM_S);
 948
 949                if (netif_msg_rx_err(pf))
 950                        dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
 951                                 event, queue, pf_num, vf_num);
 952                wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
 953                mdd_detected = true;
 954        }
 955
 956        reg = rd32(hw, GL_MDET_RX);
 957        if (reg & GL_MDET_RX_VALID_M) {
 958                u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >>
 959                                GL_MDET_RX_PF_NUM_S;
 960                u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >>
 961                                GL_MDET_RX_VF_NUM_S;
 962                u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >>
 963                                GL_MDET_RX_MAL_TYPE_S;
 964                u16 queue = ((reg & GL_MDET_RX_QNUM_M) >>
 965                                GL_MDET_RX_QNUM_S);
 966
 967                if (netif_msg_rx_err(pf))
 968                        dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
 969                                 event, queue, pf_num, vf_num);
 970                wr32(hw, GL_MDET_RX, 0xffffffff);
 971                mdd_detected = true;
 972        }
 973
 974        if (mdd_detected) {
 975                bool pf_mdd_detected = false;
 976
 977                reg = rd32(hw, PF_MDET_TX_PQM);
 978                if (reg & PF_MDET_TX_PQM_VALID_M) {
 979                        wr32(hw, PF_MDET_TX_PQM, 0xFFFF);
 980                        dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n");
 981                        pf_mdd_detected = true;
 982                }
 983
 984                reg = rd32(hw, PF_MDET_TX_TCLAN);
 985                if (reg & PF_MDET_TX_TCLAN_VALID_M) {
 986                        wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF);
 987                        dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n");
 988                        pf_mdd_detected = true;
 989                }
 990
 991                reg = rd32(hw, PF_MDET_RX);
 992                if (reg & PF_MDET_RX_VALID_M) {
 993                        wr32(hw, PF_MDET_RX, 0xFFFF);
 994                        dev_info(&pf->pdev->dev, "RX driver issue detected, PF reset issued\n");
 995                        pf_mdd_detected = true;
 996                }
 997                /* Queue belongs to the PF initiate a reset */
 998                if (pf_mdd_detected) {
 999                        set_bit(__ICE_NEEDS_RESTART, pf->state);
1000                        ice_service_task_schedule(pf);
1001                }
1002        }
1003
1004        /* see if one of the VFs needs to be reset */
1005        for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
1006                struct ice_vf *vf = &pf->vf[i];
1007
1008                reg = rd32(hw, VP_MDET_TX_PQM(i));
1009                if (reg & VP_MDET_TX_PQM_VALID_M) {
1010                        wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF);
1011                        vf->num_mdd_events++;
1012                        dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
1013                                 i);
1014                }
1015
1016                reg = rd32(hw, VP_MDET_TX_TCLAN(i));
1017                if (reg & VP_MDET_TX_TCLAN_VALID_M) {
1018                        wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF);
1019                        vf->num_mdd_events++;
1020                        dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
1021                                 i);
1022                }
1023
1024                reg = rd32(hw, VP_MDET_TX_TDPU(i));
1025                if (reg & VP_MDET_TX_TDPU_VALID_M) {
1026                        wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF);
1027                        vf->num_mdd_events++;
1028                        dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
1029                                 i);
1030                }
1031
1032                reg = rd32(hw, VP_MDET_RX(i));
1033                if (reg & VP_MDET_RX_VALID_M) {
1034                        wr32(hw, VP_MDET_RX(i), 0xFFFF);
1035                        vf->num_mdd_events++;
1036                        dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
1037                                 i);
1038                }
1039
1040                if (vf->num_mdd_events > ICE_DFLT_NUM_MDD_EVENTS_ALLOWED) {
1041                        dev_info(&pf->pdev->dev,
1042                                 "Too many MDD events on VF %d, disabled\n", i);
1043                        dev_info(&pf->pdev->dev,
1044                                 "Use PF Control I/F to re-enable the VF\n");
1045                        set_bit(ICE_VF_STATE_DIS, vf->vf_states);
1046                }
1047        }
1048
1049        /* re-enable MDD interrupt cause */
1050        clear_bit(__ICE_MDD_EVENT_PENDING, pf->state);
1051        reg = rd32(hw, PFINT_OICR_ENA);
1052        reg |= PFINT_OICR_MAL_DETECT_M;
1053        wr32(hw, PFINT_OICR_ENA, reg);
1054        ice_flush(hw);
1055}
1056
1057/**
1058 * ice_service_task - manage and run subtasks
1059 * @work: pointer to work_struct contained by the PF struct
1060 */
1061static void ice_service_task(struct work_struct *work)
1062{
1063        struct ice_pf *pf = container_of(work, struct ice_pf, serv_task);
1064        unsigned long start_time = jiffies;
1065
1066        /* subtasks */
1067
1068        /* process reset requests first */
1069        ice_reset_subtask(pf);
1070
1071        /* bail if a reset/recovery cycle is pending or rebuild failed */
1072        if (ice_is_reset_in_progress(pf->state) ||
1073            test_bit(__ICE_SUSPENDED, pf->state) ||
1074            test_bit(__ICE_NEEDS_RESTART, pf->state)) {
1075                ice_service_task_complete(pf);
1076                return;
1077        }
1078
1079        ice_check_for_hang_subtask(pf);
1080        ice_sync_fltr_subtask(pf);
1081        ice_handle_mdd_event(pf);
1082        ice_process_vflr_event(pf);
1083        ice_watchdog_subtask(pf);
1084        ice_clean_adminq_subtask(pf);
1085        ice_clean_mailboxq_subtask(pf);
1086
1087        /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */
1088        ice_service_task_complete(pf);
1089
1090        /* If the tasks have taken longer than one service timer period
1091         * or there is more work to be done, reset the service timer to
1092         * schedule the service task now.
1093         */
1094        if (time_after(jiffies, (start_time + pf->serv_tmr_period)) ||
1095            test_bit(__ICE_MDD_EVENT_PENDING, pf->state) ||
1096            test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
1097            test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
1098            test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
1099                mod_timer(&pf->serv_tmr, jiffies);
1100}
1101
1102/**
1103 * ice_set_ctrlq_len - helper function to set controlq length
1104 * @hw: pointer to the hw instance
1105 */
1106static void ice_set_ctrlq_len(struct ice_hw *hw)
1107{
1108        hw->adminq.num_rq_entries = ICE_AQ_LEN;
1109        hw->adminq.num_sq_entries = ICE_AQ_LEN;
1110        hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
1111        hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
1112        hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN;
1113        hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN;
1114        hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
1115        hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
1116}
1117
1118/**
1119 * ice_irq_affinity_notify - Callback for affinity changes
1120 * @notify: context as to what irq was changed
1121 * @mask: the new affinity mask
1122 *
1123 * This is a callback function used by the irq_set_affinity_notifier function
1124 * so that we may register to receive changes to the irq affinity masks.
1125 */
1126static void ice_irq_affinity_notify(struct irq_affinity_notify *notify,
1127                                    const cpumask_t *mask)
1128{
1129        struct ice_q_vector *q_vector =
1130                container_of(notify, struct ice_q_vector, affinity_notify);
1131
1132        cpumask_copy(&q_vector->affinity_mask, mask);
1133}
1134
1135/**
1136 * ice_irq_affinity_release - Callback for affinity notifier release
1137 * @ref: internal core kernel usage
1138 *
1139 * This is a callback function used by the irq_set_affinity_notifier function
1140 * to inform the current notification subscriber that they will no longer
1141 * receive notifications.
1142 */
1143static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
1144
1145/**
1146 * ice_vsi_ena_irq - Enable IRQ for the given VSI
1147 * @vsi: the VSI being configured
1148 */
1149static int ice_vsi_ena_irq(struct ice_vsi *vsi)
1150{
1151        struct ice_pf *pf = vsi->back;
1152        struct ice_hw *hw = &pf->hw;
1153
1154        if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
1155                int i;
1156
1157                for (i = 0; i < vsi->num_q_vectors; i++)
1158                        ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]);
1159        }
1160
1161        ice_flush(hw);
1162        return 0;
1163}
1164
1165/**
1166 * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
1167 * @vsi: the VSI being configured
1168 * @basename: name for the vector
1169 */
1170static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
1171{
1172        int q_vectors = vsi->num_q_vectors;
1173        struct ice_pf *pf = vsi->back;
1174        int base = vsi->sw_base_vector;
1175        int rx_int_idx = 0;
1176        int tx_int_idx = 0;
1177        int vector, err;
1178        int irq_num;
1179
1180        for (vector = 0; vector < q_vectors; vector++) {
1181                struct ice_q_vector *q_vector = vsi->q_vectors[vector];
1182
1183                irq_num = pf->msix_entries[base + vector].vector;
1184
1185                if (q_vector->tx.ring && q_vector->rx.ring) {
1186                        snprintf(q_vector->name, sizeof(q_vector->name) - 1,
1187                                 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
1188                        tx_int_idx++;
1189                } else if (q_vector->rx.ring) {
1190                        snprintf(q_vector->name, sizeof(q_vector->name) - 1,
1191                                 "%s-%s-%d", basename, "rx", rx_int_idx++);
1192                } else if (q_vector->tx.ring) {
1193                        snprintf(q_vector->name, sizeof(q_vector->name) - 1,
1194                                 "%s-%s-%d", basename, "tx", tx_int_idx++);
1195                } else {
1196                        /* skip this unused q_vector */
1197                        continue;
1198                }
1199                err = devm_request_irq(&pf->pdev->dev,
1200                                       pf->msix_entries[base + vector].vector,
1201                                       vsi->irq_handler, 0, q_vector->name,
1202                                       q_vector);
1203                if (err) {
1204                        netdev_err(vsi->netdev,
1205                                   "MSIX request_irq failed, error: %d\n", err);
1206                        goto free_q_irqs;
1207                }
1208
1209                /* register for affinity change notifications */
1210                q_vector->affinity_notify.notify = ice_irq_affinity_notify;
1211                q_vector->affinity_notify.release = ice_irq_affinity_release;
1212                irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
1213
1214                /* assign the mask for this irq */
1215                irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
1216        }
1217
1218        vsi->irqs_ready = true;
1219        return 0;
1220
1221free_q_irqs:
1222        while (vector) {
1223                vector--;
1224                irq_num = pf->msix_entries[base + vector].vector,
1225                irq_set_affinity_notifier(irq_num, NULL);
1226                irq_set_affinity_hint(irq_num, NULL);
1227                devm_free_irq(&pf->pdev->dev, irq_num, &vsi->q_vectors[vector]);
1228        }
1229        return err;
1230}
1231
1232/**
1233 * ice_ena_misc_vector - enable the non-queue interrupts
1234 * @pf: board private structure
1235 */
1236static void ice_ena_misc_vector(struct ice_pf *pf)
1237{
1238        struct ice_hw *hw = &pf->hw;
1239        u32 val;
1240
1241        /* clear things first */
1242        wr32(hw, PFINT_OICR_ENA, 0);    /* disable all */
1243        rd32(hw, PFINT_OICR);           /* read to clear */
1244
1245        val = (PFINT_OICR_ECC_ERR_M |
1246               PFINT_OICR_MAL_DETECT_M |
1247               PFINT_OICR_GRST_M |
1248               PFINT_OICR_PCI_EXCEPTION_M |
1249               PFINT_OICR_VFLR_M |
1250               PFINT_OICR_HMC_ERR_M |
1251               PFINT_OICR_PE_CRITERR_M);
1252
1253        wr32(hw, PFINT_OICR_ENA, val);
1254
1255        /* SW_ITR_IDX = 0, but don't change INTENA */
1256        wr32(hw, GLINT_DYN_CTL(pf->hw_oicr_idx),
1257             GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
1258}
1259
1260/**
1261 * ice_misc_intr - misc interrupt handler
1262 * @irq: interrupt number
1263 * @data: pointer to a q_vector
1264 */
1265static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
1266{
1267        struct ice_pf *pf = (struct ice_pf *)data;
1268        struct ice_hw *hw = &pf->hw;
1269        irqreturn_t ret = IRQ_NONE;
1270        u32 oicr, ena_mask;
1271
1272        set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
1273        set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state);
1274
1275        oicr = rd32(hw, PFINT_OICR);
1276        ena_mask = rd32(hw, PFINT_OICR_ENA);
1277
1278        if (oicr & PFINT_OICR_MAL_DETECT_M) {
1279                ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
1280                set_bit(__ICE_MDD_EVENT_PENDING, pf->state);
1281        }
1282        if (oicr & PFINT_OICR_VFLR_M) {
1283                ena_mask &= ~PFINT_OICR_VFLR_M;
1284                set_bit(__ICE_VFLR_EVENT_PENDING, pf->state);
1285        }
1286
1287        if (oicr & PFINT_OICR_GRST_M) {
1288                u32 reset;
1289
1290                /* we have a reset warning */
1291                ena_mask &= ~PFINT_OICR_GRST_M;
1292                reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1293                        GLGEN_RSTAT_RESET_TYPE_S;
1294
1295                if (reset == ICE_RESET_CORER)
1296                        pf->corer_count++;
1297                else if (reset == ICE_RESET_GLOBR)
1298                        pf->globr_count++;
1299                else if (reset == ICE_RESET_EMPR)
1300                        pf->empr_count++;
1301                else
1302                        dev_dbg(&pf->pdev->dev, "Invalid reset type %d\n",
1303                                reset);
1304
1305                /* If a reset cycle isn't already in progress, we set a bit in
1306                 * pf->state so that the service task can start a reset/rebuild.
1307                 * We also make note of which reset happened so that peer
1308                 * devices/drivers can be informed.
1309                 */
1310                if (!test_and_set_bit(__ICE_RESET_OICR_RECV, pf->state)) {
1311                        if (reset == ICE_RESET_CORER)
1312                                set_bit(__ICE_CORER_RECV, pf->state);
1313                        else if (reset == ICE_RESET_GLOBR)
1314                                set_bit(__ICE_GLOBR_RECV, pf->state);
1315                        else
1316                                set_bit(__ICE_EMPR_RECV, pf->state);
1317
1318                        /* There are couple of different bits at play here.
1319                         * hw->reset_ongoing indicates whether the hardware is
1320                         * in reset. This is set to true when a reset interrupt
1321                         * is received and set back to false after the driver
1322                         * has determined that the hardware is out of reset.
1323                         *
1324                         * __ICE_RESET_OICR_RECV in pf->state indicates
1325                         * that a post reset rebuild is required before the
1326                         * driver is operational again. This is set above.
1327                         *
1328                         * As this is the start of the reset/rebuild cycle, set
1329                         * both to indicate that.
1330                         */
1331                        hw->reset_ongoing = true;
1332                }
1333        }
1334
1335        if (oicr & PFINT_OICR_HMC_ERR_M) {
1336                ena_mask &= ~PFINT_OICR_HMC_ERR_M;
1337                dev_dbg(&pf->pdev->dev,
1338                        "HMC Error interrupt - info 0x%x, data 0x%x\n",
1339                        rd32(hw, PFHMC_ERRORINFO),
1340                        rd32(hw, PFHMC_ERRORDATA));
1341        }
1342
1343        /* Report and mask off any remaining unexpected interrupts */
1344        oicr &= ena_mask;
1345        if (oicr) {
1346                dev_dbg(&pf->pdev->dev, "unhandled interrupt oicr=0x%08x\n",
1347                        oicr);
1348                /* If a critical error is pending there is no choice but to
1349                 * reset the device.
1350                 */
1351                if (oicr & (PFINT_OICR_PE_CRITERR_M |
1352                            PFINT_OICR_PCI_EXCEPTION_M |
1353                            PFINT_OICR_ECC_ERR_M)) {
1354                        set_bit(__ICE_PFR_REQ, pf->state);
1355                        ice_service_task_schedule(pf);
1356                }
1357                ena_mask &= ~oicr;
1358        }
1359        ret = IRQ_HANDLED;
1360
1361        /* re-enable interrupt causes that are not handled during this pass */
1362        wr32(hw, PFINT_OICR_ENA, ena_mask);
1363        if (!test_bit(__ICE_DOWN, pf->state)) {
1364                ice_service_task_schedule(pf);
1365                ice_irq_dynamic_ena(hw, NULL, NULL);
1366        }
1367
1368        return ret;
1369}
1370
1371/**
1372 * ice_dis_ctrlq_interrupts - disable control queue interrupts
1373 * @hw: pointer to HW structure
1374 */
1375static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
1376{
1377        /* disable Admin queue Interrupt causes */
1378        wr32(hw, PFINT_FW_CTL,
1379             rd32(hw, PFINT_FW_CTL) & ~PFINT_FW_CTL_CAUSE_ENA_M);
1380
1381        /* disable Mailbox queue Interrupt causes */
1382        wr32(hw, PFINT_MBX_CTL,
1383             rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
1384
1385        /* disable Control queue Interrupt causes */
1386        wr32(hw, PFINT_OICR_CTL,
1387             rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
1388
1389        ice_flush(hw);
1390}
1391
1392/**
1393 * ice_free_irq_msix_misc - Unroll misc vector setup
1394 * @pf: board private structure
1395 */
1396static void ice_free_irq_msix_misc(struct ice_pf *pf)
1397{
1398        struct ice_hw *hw = &pf->hw;
1399
1400        ice_dis_ctrlq_interrupts(hw);
1401
1402        /* disable OICR interrupt */
1403        wr32(hw, PFINT_OICR_ENA, 0);
1404        ice_flush(hw);
1405
1406        if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) {
1407                synchronize_irq(pf->msix_entries[pf->sw_oicr_idx].vector);
1408                devm_free_irq(&pf->pdev->dev,
1409                              pf->msix_entries[pf->sw_oicr_idx].vector, pf);
1410        }
1411
1412        pf->num_avail_sw_msix += 1;
1413        ice_free_res(pf->sw_irq_tracker, pf->sw_oicr_idx, ICE_RES_MISC_VEC_ID);
1414        pf->num_avail_hw_msix += 1;
1415        ice_free_res(pf->hw_irq_tracker, pf->hw_oicr_idx, ICE_RES_MISC_VEC_ID);
1416}
1417
1418/**
1419 * ice_ena_ctrlq_interrupts - enable control queue interrupts
1420 * @hw: pointer to HW structure
1421 * @v_idx: HW vector index to associate the control queue interrupts with
1422 */
1423static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 v_idx)
1424{
1425        u32 val;
1426
1427        val = ((v_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
1428               PFINT_OICR_CTL_CAUSE_ENA_M);
1429        wr32(hw, PFINT_OICR_CTL, val);
1430
1431        /* enable Admin queue Interrupt causes */
1432        val = ((v_idx & PFINT_FW_CTL_MSIX_INDX_M) |
1433               PFINT_FW_CTL_CAUSE_ENA_M);
1434        wr32(hw, PFINT_FW_CTL, val);
1435
1436        /* enable Mailbox queue Interrupt causes */
1437        val = ((v_idx & PFINT_MBX_CTL_MSIX_INDX_M) |
1438               PFINT_MBX_CTL_CAUSE_ENA_M);
1439        wr32(hw, PFINT_MBX_CTL, val);
1440
1441        ice_flush(hw);
1442}
1443
1444/**
1445 * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
1446 * @pf: board private structure
1447 *
1448 * This sets up the handler for MSIX 0, which is used to manage the
1449 * non-queue interrupts, e.g. AdminQ and errors. This is not used
1450 * when in MSI or Legacy interrupt mode.
1451 */
1452static int ice_req_irq_msix_misc(struct ice_pf *pf)
1453{
1454        struct ice_hw *hw = &pf->hw;
1455        int oicr_idx, err = 0;
1456
1457        if (!pf->int_name[0])
1458                snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
1459                         dev_driver_string(&pf->pdev->dev),
1460                         dev_name(&pf->pdev->dev));
1461
1462        /* Do not request IRQ but do enable OICR interrupt since settings are
1463         * lost during reset. Note that this function is called only during
1464         * rebuild path and not while reset is in progress.
1465         */
1466        if (ice_is_reset_in_progress(pf->state))
1467                goto skip_req_irq;
1468
1469        /* reserve one vector in sw_irq_tracker for misc interrupts */
1470        oicr_idx = ice_get_res(pf, pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
1471        if (oicr_idx < 0)
1472                return oicr_idx;
1473
1474        pf->num_avail_sw_msix -= 1;
1475        pf->sw_oicr_idx = oicr_idx;
1476
1477        /* reserve one vector in hw_irq_tracker for misc interrupts */
1478        oicr_idx = ice_get_res(pf, pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
1479        if (oicr_idx < 0) {
1480                ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
1481                pf->num_avail_sw_msix += 1;
1482                return oicr_idx;
1483        }
1484        pf->num_avail_hw_msix -= 1;
1485        pf->hw_oicr_idx = oicr_idx;
1486
1487        err = devm_request_irq(&pf->pdev->dev,
1488                               pf->msix_entries[pf->sw_oicr_idx].vector,
1489                               ice_misc_intr, 0, pf->int_name, pf);
1490        if (err) {
1491                dev_err(&pf->pdev->dev,
1492                        "devm_request_irq for %s failed: %d\n",
1493                        pf->int_name, err);
1494                ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
1495                pf->num_avail_sw_msix += 1;
1496                ice_free_res(pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
1497                pf->num_avail_hw_msix += 1;
1498                return err;
1499        }
1500
1501skip_req_irq:
1502        ice_ena_misc_vector(pf);
1503
1504        ice_ena_ctrlq_interrupts(hw, pf->hw_oicr_idx);
1505        wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->hw_oicr_idx),
1506             ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
1507
1508        ice_flush(hw);
1509        ice_irq_dynamic_ena(hw, NULL, NULL);
1510
1511        return 0;
1512}
1513
1514/**
1515 * ice_napi_del - Remove NAPI handler for the VSI
1516 * @vsi: VSI for which NAPI handler is to be removed
1517 */
1518void ice_napi_del(struct ice_vsi *vsi)
1519{
1520        int v_idx;
1521
1522        if (!vsi->netdev)
1523                return;
1524
1525        for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++)
1526                netif_napi_del(&vsi->q_vectors[v_idx]->napi);
1527}
1528
1529/**
1530 * ice_napi_add - register NAPI handler for the VSI
1531 * @vsi: VSI for which NAPI handler is to be registered
1532 *
1533 * This function is only called in the driver's load path. Registering the NAPI
1534 * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
1535 * reset/rebuild, etc.)
1536 */
1537static void ice_napi_add(struct ice_vsi *vsi)
1538{
1539        int v_idx;
1540
1541        if (!vsi->netdev)
1542                return;
1543
1544        for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++)
1545                netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
1546                               ice_napi_poll, NAPI_POLL_WEIGHT);
1547}
1548
1549/**
1550 * ice_cfg_netdev - Allocate, configure and register a netdev
1551 * @vsi: the VSI associated with the new netdev
1552 *
1553 * Returns 0 on success, negative value on failure
1554 */
1555static int ice_cfg_netdev(struct ice_vsi *vsi)
1556{
1557        netdev_features_t csumo_features;
1558        netdev_features_t vlano_features;
1559        netdev_features_t dflt_features;
1560        netdev_features_t tso_features;
1561        struct ice_netdev_priv *np;
1562        struct net_device *netdev;
1563        u8 mac_addr[ETH_ALEN];
1564        int err;
1565
1566        netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
1567                                    vsi->alloc_rxq);
1568        if (!netdev)
1569                return -ENOMEM;
1570
1571        vsi->netdev = netdev;
1572        np = netdev_priv(netdev);
1573        np->vsi = vsi;
1574
1575        dflt_features = NETIF_F_SG      |
1576                        NETIF_F_HIGHDMA |
1577                        NETIF_F_RXHASH;
1578
1579        csumo_features = NETIF_F_RXCSUM   |
1580                         NETIF_F_IP_CSUM  |
1581                         NETIF_F_SCTP_CRC |
1582                         NETIF_F_IPV6_CSUM;
1583
1584        vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER |
1585                         NETIF_F_HW_VLAN_CTAG_TX     |
1586                         NETIF_F_HW_VLAN_CTAG_RX;
1587
1588        tso_features = NETIF_F_TSO;
1589
1590        /* set features that user can change */
1591        netdev->hw_features = dflt_features | csumo_features |
1592                              vlano_features | tso_features;
1593
1594        /* enable features */
1595        netdev->features |= netdev->hw_features;
1596        /* encap and VLAN devices inherit default, csumo and tso features */
1597        netdev->hw_enc_features |= dflt_features | csumo_features |
1598                                   tso_features;
1599        netdev->vlan_features |= dflt_features | csumo_features |
1600                                 tso_features;
1601
1602        if (vsi->type == ICE_VSI_PF) {
1603                SET_NETDEV_DEV(netdev, &vsi->back->pdev->dev);
1604                ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
1605
1606                ether_addr_copy(netdev->dev_addr, mac_addr);
1607                ether_addr_copy(netdev->perm_addr, mac_addr);
1608        }
1609
1610        netdev->priv_flags |= IFF_UNICAST_FLT;
1611
1612        /* assign netdev_ops */
1613        netdev->netdev_ops = &ice_netdev_ops;
1614
1615        /* setup watchdog timeout value to be 5 second */
1616        netdev->watchdog_timeo = 5 * HZ;
1617
1618        ice_set_ethtool_ops(netdev);
1619
1620        netdev->extended->min_mtu = ETH_MIN_MTU;
1621        netdev->extended->max_mtu = ICE_MAX_MTU;
1622
1623        err = register_netdev(vsi->netdev);
1624        if (err)
1625                return err;
1626
1627        netif_carrier_off(vsi->netdev);
1628
1629        /* make sure transmit queues start off as stopped */
1630        netif_tx_stop_all_queues(vsi->netdev);
1631
1632        return 0;
1633}
1634
1635/**
1636 * ice_fill_rss_lut - Fill the RSS lookup table with default values
1637 * @lut: Lookup table
1638 * @rss_table_size: Lookup table size
1639 * @rss_size: Range of queue number for hashing
1640 */
1641void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
1642{
1643        u16 i;
1644
1645        for (i = 0; i < rss_table_size; i++)
1646                lut[i] = i % rss_size;
1647}
1648
1649/**
1650 * ice_pf_vsi_setup - Set up a PF VSI
1651 * @pf: board private structure
1652 * @pi: pointer to the port_info instance
1653 *
1654 * Returns pointer to the successfully allocated VSI sw struct on success,
1655 * otherwise returns NULL on failure.
1656 */
1657static struct ice_vsi *
1658ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
1659{
1660        return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID);
1661}
1662
1663/**
1664 * ice_vlan_rx_add_vid - Add a vlan id filter to HW offload
1665 * @netdev: network interface to be adjusted
1666 * @proto: unused protocol
1667 * @vid: vlan id to be added
1668 *
1669 * net_device_ops implementation for adding vlan ids
1670 */
1671static int ice_vlan_rx_add_vid(struct net_device *netdev,
1672                               __always_unused __be16 proto, u16 vid)
1673{
1674        struct ice_netdev_priv *np = netdev_priv(netdev);
1675        struct ice_vsi *vsi = np->vsi;
1676
1677        if (vid >= VLAN_N_VID) {
1678                netdev_err(netdev, "VLAN id requested %d is out of range %d\n",
1679                           vid, VLAN_N_VID);
1680                return -EINVAL;
1681        }
1682
1683        if (vsi->info.pvid)
1684                return -EINVAL;
1685
1686        /* Enable VLAN pruning when VLAN 0 is added */
1687        if (unlikely(!vid)) {
1688                int ret = ice_cfg_vlan_pruning(vsi, true);
1689
1690                if (ret)
1691                        return ret;
1692        }
1693
1694        /* Add all VLAN ids including 0 to the switch filter. VLAN id 0 is
1695         * needed to continue allowing all untagged packets since VLAN prune
1696         * list is applied to all packets by the switch
1697         */
1698        return ice_vsi_add_vlan(vsi, vid);
1699}
1700
1701/**
1702 * ice_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
1703 * @netdev: network interface to be adjusted
1704 * @proto: unused protocol
1705 * @vid: vlan id to be removed
1706 *
1707 * net_device_ops implementation for removing vlan ids
1708 */
1709static int ice_vlan_rx_kill_vid(struct net_device *netdev,
1710                                __always_unused __be16 proto, u16 vid)
1711{
1712        struct ice_netdev_priv *np = netdev_priv(netdev);
1713        struct ice_vsi *vsi = np->vsi;
1714        int status;
1715
1716        if (vsi->info.pvid)
1717                return -EINVAL;
1718
1719        /* Make sure ice_vsi_kill_vlan is successful before updating VLAN
1720         * information
1721         */
1722        status = ice_vsi_kill_vlan(vsi, vid);
1723        if (status)
1724                return status;
1725
1726        /* Disable VLAN pruning when VLAN 0 is removed */
1727        if (unlikely(!vid))
1728                status = ice_cfg_vlan_pruning(vsi, false);
1729
1730        return status;
1731}
1732
1733/**
1734 * ice_setup_pf_sw - Setup the HW switch on startup or after reset
1735 * @pf: board private structure
1736 *
1737 * Returns 0 on success, negative value on failure
1738 */
1739static int ice_setup_pf_sw(struct ice_pf *pf)
1740{
1741        LIST_HEAD(tmp_add_list);
1742        u8 broadcast[ETH_ALEN];
1743        struct ice_vsi *vsi;
1744        int status = 0;
1745
1746        if (ice_is_reset_in_progress(pf->state))
1747                return -EBUSY;
1748
1749        vsi = ice_pf_vsi_setup(pf, pf->hw.port_info);
1750        if (!vsi) {
1751                status = -ENOMEM;
1752                goto unroll_vsi_setup;
1753        }
1754
1755        status = ice_cfg_netdev(vsi);
1756        if (status) {
1757                status = -ENODEV;
1758                goto unroll_vsi_setup;
1759        }
1760
1761        /* registering the NAPI handler requires both the queues and
1762         * netdev to be created, which are done in ice_pf_vsi_setup()
1763         * and ice_cfg_netdev() respectively
1764         */
1765        ice_napi_add(vsi);
1766
1767        /* To add a MAC filter, first add the MAC to a list and then
1768         * pass the list to ice_add_mac.
1769         */
1770
1771         /* Add a unicast MAC filter so the VSI can get its packets */
1772        status = ice_add_mac_to_list(vsi, &tmp_add_list,
1773                                     vsi->port_info->mac.perm_addr);
1774        if (status)
1775                goto unroll_napi_add;
1776
1777        /* VSI needs to receive broadcast traffic, so add the broadcast
1778         * MAC address to the list as well.
1779         */
1780        eth_broadcast_addr(broadcast);
1781        status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
1782        if (status)
1783                goto free_mac_list;
1784
1785        /* program MAC filters for entries in tmp_add_list */
1786        status = ice_add_mac(&pf->hw, &tmp_add_list);
1787        if (status) {
1788                dev_err(&pf->pdev->dev, "Could not add MAC filters\n");
1789                status = -ENOMEM;
1790                goto free_mac_list;
1791        }
1792
1793        ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
1794        return status;
1795
1796free_mac_list:
1797        ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
1798
1799unroll_napi_add:
1800        if (vsi) {
1801                ice_napi_del(vsi);
1802                if (vsi->netdev) {
1803                        if (vsi->netdev->reg_state == NETREG_REGISTERED)
1804                                unregister_netdev(vsi->netdev);
1805                        free_netdev(vsi->netdev);
1806                        vsi->netdev = NULL;
1807                }
1808        }
1809
1810unroll_vsi_setup:
1811        if (vsi) {
1812                ice_vsi_free_q_vectors(vsi);
1813                ice_vsi_delete(vsi);
1814                ice_vsi_put_qs(vsi);
1815                pf->q_left_tx += vsi->alloc_txq;
1816                pf->q_left_rx += vsi->alloc_rxq;
1817                ice_vsi_clear(vsi);
1818        }
1819        return status;
1820}
1821
1822/**
1823 * ice_determine_q_usage - Calculate queue distribution
1824 * @pf: board private structure
1825 *
1826 * Return -ENOMEM if we don't get enough queues for all ports
1827 */
1828static void ice_determine_q_usage(struct ice_pf *pf)
1829{
1830        u16 q_left_tx, q_left_rx;
1831
1832        q_left_tx = pf->hw.func_caps.common_cap.num_txq;
1833        q_left_rx = pf->hw.func_caps.common_cap.num_rxq;
1834
1835        pf->num_lan_tx = min_t(int, q_left_tx, num_online_cpus());
1836
1837        /* only 1 Rx queue unless RSS is enabled */
1838        if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
1839                pf->num_lan_rx = 1;
1840        else
1841                pf->num_lan_rx = min_t(int, q_left_rx, num_online_cpus());
1842
1843        pf->q_left_tx = q_left_tx - pf->num_lan_tx;
1844        pf->q_left_rx = q_left_rx - pf->num_lan_rx;
1845}
1846
1847/**
1848 * ice_deinit_pf - Unrolls initialziations done by ice_init_pf
1849 * @pf: board private structure to initialize
1850 */
1851static void ice_deinit_pf(struct ice_pf *pf)
1852{
1853        ice_service_task_stop(pf);
1854        mutex_destroy(&pf->sw_mutex);
1855        mutex_destroy(&pf->avail_q_mutex);
1856}
1857
1858/**
1859 * ice_init_pf - Initialize general software structures (struct ice_pf)
1860 * @pf: board private structure to initialize
1861 */
1862static void ice_init_pf(struct ice_pf *pf)
1863{
1864        bitmap_zero(pf->flags, ICE_PF_FLAGS_NBITS);
1865        set_bit(ICE_FLAG_MSIX_ENA, pf->flags);
1866#ifdef CONFIG_PCI_IOV
1867        if (pf->hw.func_caps.common_cap.sr_iov_1_1) {
1868                struct ice_hw *hw = &pf->hw;
1869
1870                set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
1871                pf->num_vfs_supported = min_t(int, hw->func_caps.num_allocd_vfs,
1872                                              ICE_MAX_VF_COUNT);
1873        }
1874#endif /* CONFIG_PCI_IOV */
1875
1876        mutex_init(&pf->sw_mutex);
1877        mutex_init(&pf->avail_q_mutex);
1878
1879        /* Clear avail_[t|r]x_qs bitmaps (set all to avail) */
1880        mutex_lock(&pf->avail_q_mutex);
1881        bitmap_zero(pf->avail_txqs, ICE_MAX_TXQS);
1882        bitmap_zero(pf->avail_rxqs, ICE_MAX_RXQS);
1883        mutex_unlock(&pf->avail_q_mutex);
1884
1885        if (pf->hw.func_caps.common_cap.rss_table_size)
1886                set_bit(ICE_FLAG_RSS_ENA, pf->flags);
1887
1888        /* setup service timer and periodic service task */
1889        timer_setup(&pf->serv_tmr, ice_service_timer, 0);
1890        pf->serv_tmr_period = HZ;
1891        INIT_WORK(&pf->serv_task, ice_service_task);
1892        clear_bit(__ICE_SERVICE_SCHED, pf->state);
1893}
1894
1895/**
1896 * ice_ena_msix_range - Request a range of MSIX vectors from the OS
1897 * @pf: board private structure
1898 *
1899 * compute the number of MSIX vectors required (v_budget) and request from
1900 * the OS. Return the number of vectors reserved or negative on failure
1901 */
1902static int ice_ena_msix_range(struct ice_pf *pf)
1903{
1904        int v_left, v_actual, v_budget = 0;
1905        int needed, err, i;
1906
1907        v_left = pf->hw.func_caps.common_cap.num_msix_vectors;
1908
1909        /* reserve one vector for miscellaneous handler */
1910        needed = 1;
1911        v_budget += needed;
1912        v_left -= needed;
1913
1914        /* reserve vectors for LAN traffic */
1915        pf->num_lan_msix = min_t(int, num_online_cpus(), v_left);
1916        v_budget += pf->num_lan_msix;
1917        v_left -= pf->num_lan_msix;
1918
1919        pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget,
1920                                        sizeof(*pf->msix_entries), GFP_KERNEL);
1921
1922        if (!pf->msix_entries) {
1923                err = -ENOMEM;
1924                goto exit_err;
1925        }
1926
1927        for (i = 0; i < v_budget; i++)
1928                pf->msix_entries[i].entry = i;
1929
1930        /* actually reserve the vectors */
1931        v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries,
1932                                         ICE_MIN_MSIX, v_budget);
1933
1934        if (v_actual < 0) {
1935                dev_err(&pf->pdev->dev, "unable to reserve MSI-X vectors\n");
1936                err = v_actual;
1937                goto msix_err;
1938        }
1939
1940        if (v_actual < v_budget) {
1941                dev_warn(&pf->pdev->dev,
1942                         "not enough vectors. requested = %d, obtained = %d\n",
1943                         v_budget, v_actual);
1944                if (v_actual >= (pf->num_lan_msix + 1)) {
1945                        pf->num_avail_sw_msix = v_actual -
1946                                                (pf->num_lan_msix + 1);
1947                } else if (v_actual >= 2) {
1948                        pf->num_lan_msix = 1;
1949                        pf->num_avail_sw_msix = v_actual - 2;
1950                } else {
1951                        pci_disable_msix(pf->pdev);
1952                        err = -ERANGE;
1953                        goto msix_err;
1954                }
1955        }
1956
1957        return v_actual;
1958
1959msix_err:
1960        devm_kfree(&pf->pdev->dev, pf->msix_entries);
1961        goto exit_err;
1962
1963exit_err:
1964        pf->num_lan_msix = 0;
1965        clear_bit(ICE_FLAG_MSIX_ENA, pf->flags);
1966        return err;
1967}
1968
1969/**
1970 * ice_dis_msix - Disable MSI-X interrupt setup in OS
1971 * @pf: board private structure
1972 */
1973static void ice_dis_msix(struct ice_pf *pf)
1974{
1975        pci_disable_msix(pf->pdev);
1976        devm_kfree(&pf->pdev->dev, pf->msix_entries);
1977        pf->msix_entries = NULL;
1978        clear_bit(ICE_FLAG_MSIX_ENA, pf->flags);
1979}
1980
1981/**
1982 * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme
1983 * @pf: board private structure
1984 */
1985static void ice_clear_interrupt_scheme(struct ice_pf *pf)
1986{
1987        if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
1988                ice_dis_msix(pf);
1989
1990        if (pf->sw_irq_tracker) {
1991                devm_kfree(&pf->pdev->dev, pf->sw_irq_tracker);
1992                pf->sw_irq_tracker = NULL;
1993        }
1994
1995        if (pf->hw_irq_tracker) {
1996                devm_kfree(&pf->pdev->dev, pf->hw_irq_tracker);
1997                pf->hw_irq_tracker = NULL;
1998        }
1999}
2000
2001/**
2002 * ice_init_interrupt_scheme - Determine proper interrupt scheme
2003 * @pf: board private structure to initialize
2004 */
2005static int ice_init_interrupt_scheme(struct ice_pf *pf)
2006{
2007        int vectors = 0, hw_vectors = 0;
2008
2009        if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
2010                vectors = ice_ena_msix_range(pf);
2011        else
2012                return -ENODEV;
2013
2014        if (vectors < 0)
2015                return vectors;
2016
2017        /* set up vector assignment tracking */
2018        pf->sw_irq_tracker =
2019                devm_kzalloc(&pf->pdev->dev, sizeof(*pf->sw_irq_tracker) +
2020                             (sizeof(u16) * vectors), GFP_KERNEL);
2021        if (!pf->sw_irq_tracker) {
2022                ice_dis_msix(pf);
2023                return -ENOMEM;
2024        }
2025
2026        /* populate SW interrupts pool with number of OS granted IRQs. */
2027        pf->num_avail_sw_msix = vectors;
2028        pf->sw_irq_tracker->num_entries = vectors;
2029
2030        /* set up HW vector assignment tracking */
2031        hw_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
2032        pf->hw_irq_tracker =
2033                devm_kzalloc(&pf->pdev->dev, sizeof(*pf->hw_irq_tracker) +
2034                             (sizeof(u16) * hw_vectors), GFP_KERNEL);
2035        if (!pf->hw_irq_tracker) {
2036                ice_clear_interrupt_scheme(pf);
2037                return -ENOMEM;
2038        }
2039
2040        /* populate HW interrupts pool with number of HW supported irqs. */
2041        pf->num_avail_hw_msix = hw_vectors;
2042        pf->hw_irq_tracker->num_entries = hw_vectors;
2043
2044        return 0;
2045}
2046
2047/**
2048 * ice_verify_itr_gran - verify driver's assumption of ITR granularity
2049 * @pf: pointer to the PF structure
2050 *
2051 * There is no error returned here because the driver will be able to handle a
2052 * different ITR granularity, but interrupt moderation will not be accurate if
2053 * the driver's assumptions are not verified. This assumption is made so we can
2054 * use constants in the hot path instead of accessing structure members.
2055 */
2056static void ice_verify_itr_gran(struct ice_pf *pf)
2057{
2058        if (pf->hw.itr_gran != (ICE_ITR_GRAN_S << 1))
2059                dev_warn(&pf->pdev->dev,
2060                         "%d ITR granularity assumption is invalid, actual ITR granularity is %d. Interrupt moderation will be inaccurate!\n",
2061                         (ICE_ITR_GRAN_S << 1), pf->hw.itr_gran);
2062}
2063
2064/**
2065 * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
2066 * @pf: pointer to the PF structure
2067 *
2068 * There is no error returned here because the driver should be able to handle
2069 * 128 Byte cache lines, so we only print a warning in case issues are seen,
2070 * specifically with Tx.
2071 */
2072static void ice_verify_cacheline_size(struct ice_pf *pf)
2073{
2074        if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
2075                dev_warn(&pf->pdev->dev,
2076                         "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
2077                         ICE_CACHE_LINE_BYTES);
2078}
2079
2080/**
2081 * ice_probe - Device initialization routine
2082 * @pdev: PCI device information struct
2083 * @ent: entry in ice_pci_tbl
2084 *
2085 * Returns 0 on success, negative on failure
2086 */
2087static int ice_probe(struct pci_dev *pdev,
2088                     const struct pci_device_id __always_unused *ent)
2089{
2090        struct ice_pf *pf;
2091        struct ice_hw *hw;
2092        int err;
2093
2094        /* this driver uses devres, see Documentation/driver-model/devres.txt */
2095        err = pcim_enable_device(pdev);
2096        if (err)
2097                return err;
2098
2099        err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev));
2100        if (err) {
2101                dev_err(&pdev->dev, "BAR0 I/O map error %d\n", err);
2102                return err;
2103        }
2104
2105        pf = devm_kzalloc(&pdev->dev, sizeof(*pf), GFP_KERNEL);
2106        if (!pf)
2107                return -ENOMEM;
2108
2109        /* set up for high or low dma */
2110        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2111        if (err)
2112                err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
2113        if (err) {
2114                dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err);
2115                return err;
2116        }
2117
2118        pci_enable_pcie_error_reporting(pdev);
2119        pci_set_master(pdev);
2120
2121        pf->pdev = pdev;
2122        pci_set_drvdata(pdev, pf);
2123        set_bit(__ICE_DOWN, pf->state);
2124        /* Disable service task until DOWN bit is cleared */
2125        set_bit(__ICE_SERVICE_DIS, pf->state);
2126
2127        hw = &pf->hw;
2128        hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
2129        hw->back = pf;
2130        hw->vendor_id = pdev->vendor;
2131        hw->device_id = pdev->device;
2132        pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
2133        hw->subsystem_vendor_id = pdev->subsystem_vendor;
2134        hw->subsystem_device_id = pdev->subsystem_device;
2135        hw->bus.device = PCI_SLOT(pdev->devfn);
2136        hw->bus.func = PCI_FUNC(pdev->devfn);
2137        ice_set_ctrlq_len(hw);
2138
2139        pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
2140
2141#ifndef CONFIG_DYNAMIC_DEBUG
2142        if (debug < -1)
2143                hw->debug_mask = debug;
2144#endif
2145
2146        err = ice_init_hw(hw);
2147        if (err) {
2148                dev_err(&pdev->dev, "ice_init_hw failed: %d\n", err);
2149                err = -EIO;
2150                goto err_exit_unroll;
2151        }
2152
2153        dev_info(&pdev->dev, "firmware %d.%d.%05d api %d.%d\n",
2154                 hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
2155                 hw->api_maj_ver, hw->api_min_ver);
2156
2157        ice_init_pf(pf);
2158
2159        ice_determine_q_usage(pf);
2160
2161        pf->num_alloc_vsi = hw->func_caps.guar_num_vsi;
2162        if (!pf->num_alloc_vsi) {
2163                err = -EIO;
2164                goto err_init_pf_unroll;
2165        }
2166
2167        pf->vsi = devm_kcalloc(&pdev->dev, pf->num_alloc_vsi,
2168                               sizeof(*pf->vsi), GFP_KERNEL);
2169        if (!pf->vsi) {
2170                err = -ENOMEM;
2171                goto err_init_pf_unroll;
2172        }
2173
2174        err = ice_init_interrupt_scheme(pf);
2175        if (err) {
2176                dev_err(&pdev->dev,
2177                        "ice_init_interrupt_scheme failed: %d\n", err);
2178                err = -EIO;
2179                goto err_init_interrupt_unroll;
2180        }
2181
2182        /* Driver is mostly up */
2183        clear_bit(__ICE_DOWN, pf->state);
2184
2185        /* In case of MSIX we are going to setup the misc vector right here
2186         * to handle admin queue events etc. In case of legacy and MSI
2187         * the misc functionality and queue processing is combined in
2188         * the same vector and that gets setup at open.
2189         */
2190        if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
2191                err = ice_req_irq_msix_misc(pf);
2192                if (err) {
2193                        dev_err(&pdev->dev,
2194                                "setup of misc vector failed: %d\n", err);
2195                        goto err_init_interrupt_unroll;
2196                }
2197        }
2198
2199        /* create switch struct for the switch element created by FW on boot */
2200        pf->first_sw = devm_kzalloc(&pdev->dev, sizeof(*pf->first_sw),
2201                                    GFP_KERNEL);
2202        if (!pf->first_sw) {
2203                err = -ENOMEM;
2204                goto err_msix_misc_unroll;
2205        }
2206
2207        if (hw->evb_veb)
2208                pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
2209        else
2210                pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA;
2211
2212        pf->first_sw->pf = pf;
2213
2214        /* record the sw_id available for later use */
2215        pf->first_sw->sw_id = hw->port_info->sw_id;
2216
2217        err = ice_setup_pf_sw(pf);
2218        if (err) {
2219                dev_err(&pdev->dev,
2220                        "probe failed due to setup pf switch:%d\n", err);
2221                goto err_alloc_sw_unroll;
2222        }
2223
2224        clear_bit(__ICE_SERVICE_DIS, pf->state);
2225
2226        /* since everything is good, start the service timer */
2227        mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
2228
2229        ice_verify_cacheline_size(pf);
2230        ice_verify_itr_gran(pf);
2231
2232        return 0;
2233
2234err_alloc_sw_unroll:
2235        set_bit(__ICE_SERVICE_DIS, pf->state);
2236        set_bit(__ICE_DOWN, pf->state);
2237        devm_kfree(&pf->pdev->dev, pf->first_sw);
2238err_msix_misc_unroll:
2239        ice_free_irq_msix_misc(pf);
2240err_init_interrupt_unroll:
2241        ice_clear_interrupt_scheme(pf);
2242        devm_kfree(&pdev->dev, pf->vsi);
2243err_init_pf_unroll:
2244        ice_deinit_pf(pf);
2245        ice_deinit_hw(hw);
2246err_exit_unroll:
2247        pci_disable_pcie_error_reporting(pdev);
2248        return err;
2249}
2250
2251/**
2252 * ice_remove - Device removal routine
2253 * @pdev: PCI device information struct
2254 */
2255static void ice_remove(struct pci_dev *pdev)
2256{
2257        struct ice_pf *pf = pci_get_drvdata(pdev);
2258        int i;
2259
2260        if (!pf)
2261                return;
2262
2263        for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {
2264                if (!ice_is_reset_in_progress(pf->state))
2265                        break;
2266                msleep(100);
2267        }
2268
2269        set_bit(__ICE_DOWN, pf->state);
2270        ice_service_task_stop(pf);
2271
2272        if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags))
2273                ice_free_vfs(pf);
2274        ice_vsi_release_all(pf);
2275        ice_free_irq_msix_misc(pf);
2276        ice_for_each_vsi(pf, i) {
2277                if (!pf->vsi[i])
2278                        continue;
2279                ice_vsi_free_q_vectors(pf->vsi[i]);
2280        }
2281        ice_clear_interrupt_scheme(pf);
2282        ice_deinit_pf(pf);
2283        ice_deinit_hw(&pf->hw);
2284        pci_disable_pcie_error_reporting(pdev);
2285}
2286
2287/**
2288 * ice_pci_err_detected - warning that PCI error has been detected
2289 * @pdev: PCI device information struct
2290 * @err: the type of PCI error
2291 *
2292 * Called to warn that something happened on the PCI bus and the error handling
2293 * is in progress.  Allows the driver to gracefully prepare/handle PCI errors.
2294 */
2295static pci_ers_result_t
2296ice_pci_err_detected(struct pci_dev *pdev, enum pci_channel_state err)
2297{
2298        struct ice_pf *pf = pci_get_drvdata(pdev);
2299
2300        if (!pf) {
2301                dev_err(&pdev->dev, "%s: unrecoverable device error %d\n",
2302                        __func__, err);
2303                return PCI_ERS_RESULT_DISCONNECT;
2304        }
2305
2306        if (!test_bit(__ICE_SUSPENDED, pf->state)) {
2307                ice_service_task_stop(pf);
2308
2309                if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) {
2310                        set_bit(__ICE_PFR_REQ, pf->state);
2311                        ice_prepare_for_reset(pf);
2312                }
2313        }
2314
2315        return PCI_ERS_RESULT_NEED_RESET;
2316}
2317
2318/**
2319 * ice_pci_err_slot_reset - a PCI slot reset has just happened
2320 * @pdev: PCI device information struct
2321 *
2322 * Called to determine if the driver can recover from the PCI slot reset by
2323 * using a register read to determine if the device is recoverable.
2324 */
2325static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
2326{
2327        struct ice_pf *pf = pci_get_drvdata(pdev);
2328        pci_ers_result_t result;
2329        int err;
2330        u32 reg;
2331
2332        err = pci_enable_device_mem(pdev);
2333        if (err) {
2334                dev_err(&pdev->dev,
2335                        "Cannot re-enable PCI device after reset, error %d\n",
2336                        err);
2337                result = PCI_ERS_RESULT_DISCONNECT;
2338        } else {
2339                pci_set_master(pdev);
2340                pci_restore_state(pdev);
2341                pci_save_state(pdev);
2342                pci_wake_from_d3(pdev, false);
2343
2344                /* Check for life */
2345                reg = rd32(&pf->hw, GLGEN_RTRIG);
2346                if (!reg)
2347                        result = PCI_ERS_RESULT_RECOVERED;
2348                else
2349                        result = PCI_ERS_RESULT_DISCONNECT;
2350        }
2351
2352        err = pci_cleanup_aer_uncorrect_error_status(pdev);
2353        if (err)
2354                dev_dbg(&pdev->dev,
2355                        "pci_cleanup_aer_uncorrect_error_status failed, error %d\n",
2356                        err);
2357                /* non-fatal, continue */
2358
2359        return result;
2360}
2361
2362/**
2363 * ice_pci_err_resume - restart operations after PCI error recovery
2364 * @pdev: PCI device information struct
2365 *
2366 * Called to allow the driver to bring things back up after PCI error and/or
2367 * reset recovery have finished
2368 */
2369static void ice_pci_err_resume(struct pci_dev *pdev)
2370{
2371        struct ice_pf *pf = pci_get_drvdata(pdev);
2372
2373        if (!pf) {
2374                dev_err(&pdev->dev,
2375                        "%s failed, device is unrecoverable\n", __func__);
2376                return;
2377        }
2378
2379        if (test_bit(__ICE_SUSPENDED, pf->state)) {
2380                dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n",
2381                        __func__);
2382                return;
2383        }
2384
2385        ice_do_reset(pf, ICE_RESET_PFR);
2386        ice_service_task_restart(pf);
2387        mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
2388}
2389
2390#if 0
2391/**
2392 * ice_pci_err_reset_prepare - prepare device driver for PCI reset
2393 * @pdev: PCI device information struct
2394 */
2395static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
2396{
2397        struct ice_pf *pf = pci_get_drvdata(pdev);
2398
2399        if (!test_bit(__ICE_SUSPENDED, pf->state)) {
2400                ice_service_task_stop(pf);
2401
2402                if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) {
2403                        set_bit(__ICE_PFR_REQ, pf->state);
2404                        ice_prepare_for_reset(pf);
2405                }
2406        }
2407}
2408
2409/**
2410 * ice_pci_err_reset_done - PCI reset done, device driver reset can begin
2411 * @pdev: PCI device information struct
2412 */
2413static void ice_pci_err_reset_done(struct pci_dev *pdev)
2414{
2415        ice_pci_err_resume(pdev);
2416}
2417#endif
2418
2419/* ice_pci_tbl - PCI Device ID Table
2420 *
2421 * Wildcard entries (PCI_ANY_ID) should come last
2422 * Last entry must be all 0s
2423 *
2424 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
2425 *   Class, Class Mask, private data (not used) }
2426 */
2427static const struct pci_device_id ice_pci_tbl[] = {
2428        { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 },
2429        { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 },
2430        { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 },
2431        /* required last entry */
2432        { 0, }
2433};
2434MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
2435
2436static const struct pci_error_handlers ice_pci_err_handler = {
2437        .error_detected = ice_pci_err_detected,
2438        .slot_reset = ice_pci_err_slot_reset,
2439#if 0 /* RHEL-7 doesn't support these calls */
2440        .reset_prepare = ice_pci_err_reset_prepare,
2441        .reset_done = ice_pci_err_reset_done,
2442#endif
2443        .resume = ice_pci_err_resume
2444};
2445
2446static struct pci_driver ice_driver = {
2447        .name = KBUILD_MODNAME,
2448        .id_table = ice_pci_tbl,
2449        .probe = ice_probe,
2450        .remove = ice_remove,
2451        .sriov_configure = ice_sriov_configure,
2452        .err_handler = &ice_pci_err_handler
2453};
2454
2455/**
2456 * ice_module_init - Driver registration routine
2457 *
2458 * ice_module_init is the first routine called when the driver is
2459 * loaded. All it does is register with the PCI subsystem.
2460 */
2461static int __init ice_module_init(void)
2462{
2463        int status;
2464
2465        pr_info("%s - version %s\n", ice_driver_string, ice_drv_ver);
2466        pr_info("%s\n", ice_copyright);
2467
2468        ice_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, KBUILD_MODNAME);
2469        if (!ice_wq) {
2470                pr_err("Failed to create workqueue\n");
2471                return -ENOMEM;
2472        }
2473
2474        status = pci_register_driver(&ice_driver);
2475        if (status) {
2476                pr_err("failed to register pci driver, err %d\n", status);
2477                destroy_workqueue(ice_wq);
2478        }
2479
2480        return status;
2481}
2482module_init(ice_module_init);
2483
2484/**
2485 * ice_module_exit - Driver exit cleanup routine
2486 *
2487 * ice_module_exit is called just before the driver is removed
2488 * from memory.
2489 */
2490static void __exit ice_module_exit(void)
2491{
2492        pci_unregister_driver(&ice_driver);
2493        destroy_workqueue(ice_wq);
2494        pr_info("module unloaded\n");
2495}
2496module_exit(ice_module_exit);
2497
2498/**
2499 * ice_set_mac_address - NDO callback to set mac address
2500 * @netdev: network interface device structure
2501 * @pi: pointer to an address structure
2502 *
2503 * Returns 0 on success, negative on failure
2504 */
2505static int ice_set_mac_address(struct net_device *netdev, void *pi)
2506{
2507        struct ice_netdev_priv *np = netdev_priv(netdev);
2508        struct ice_vsi *vsi = np->vsi;
2509        struct ice_pf *pf = vsi->back;
2510        struct ice_hw *hw = &pf->hw;
2511        struct sockaddr *addr = pi;
2512        enum ice_status status;
2513        LIST_HEAD(a_mac_list);
2514        LIST_HEAD(r_mac_list);
2515        u8 flags = 0;
2516        int err;
2517        u8 *mac;
2518
2519        mac = (u8 *)addr->sa_data;
2520
2521        if (!is_valid_ether_addr(mac))
2522                return -EADDRNOTAVAIL;
2523
2524        if (ether_addr_equal(netdev->dev_addr, mac)) {
2525                netdev_warn(netdev, "already using mac %pM\n", mac);
2526                return 0;
2527        }
2528
2529        if (test_bit(__ICE_DOWN, pf->state) ||
2530            ice_is_reset_in_progress(pf->state)) {
2531                netdev_err(netdev, "can't set mac %pM. device not ready\n",
2532                           mac);
2533                return -EBUSY;
2534        }
2535
2536        /* When we change the mac address we also have to change the mac address
2537         * based filter rules that were created previously for the old mac
2538         * address. So first, we remove the old filter rule using ice_remove_mac
2539         * and then create a new filter rule using ice_add_mac. Note that for
2540         * both these operations, we first need to form a "list" of mac
2541         * addresses (even though in this case, we have only 1 mac address to be
2542         * added/removed) and this done using ice_add_mac_to_list. Depending on
2543         * the ensuing operation this "list" of mac addresses is either to be
2544         * added or removed from the filter.
2545         */
2546        err = ice_add_mac_to_list(vsi, &r_mac_list, netdev->dev_addr);
2547        if (err) {
2548                err = -EADDRNOTAVAIL;
2549                goto free_lists;
2550        }
2551
2552        status = ice_remove_mac(hw, &r_mac_list);
2553        if (status) {
2554                err = -EADDRNOTAVAIL;
2555                goto free_lists;
2556        }
2557
2558        err = ice_add_mac_to_list(vsi, &a_mac_list, mac);
2559        if (err) {
2560                err = -EADDRNOTAVAIL;
2561                goto free_lists;
2562        }
2563
2564        status = ice_add_mac(hw, &a_mac_list);
2565        if (status) {
2566                err = -EADDRNOTAVAIL;
2567                goto free_lists;
2568        }
2569
2570free_lists:
2571        /* free list entries */
2572        ice_free_fltr_list(&pf->pdev->dev, &r_mac_list);
2573        ice_free_fltr_list(&pf->pdev->dev, &a_mac_list);
2574
2575        if (err) {
2576                netdev_err(netdev, "can't set mac %pM. filter update failed\n",
2577                           mac);
2578                return err;
2579        }
2580
2581        /* change the netdev's mac address */
2582        memcpy(netdev->dev_addr, mac, netdev->addr_len);
2583        netdev_dbg(vsi->netdev, "updated mac address to %pM\n",
2584                   netdev->dev_addr);
2585
2586        /* write new mac address to the firmware */
2587        flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
2588        status = ice_aq_manage_mac_write(hw, mac, flags, NULL);
2589        if (status) {
2590                netdev_err(netdev, "can't set mac %pM. write to firmware failed.\n",
2591                           mac);
2592        }
2593        return 0;
2594}
2595
2596/**
2597 * ice_set_rx_mode - NDO callback to set the netdev filters
2598 * @netdev: network interface device structure
2599 */
2600static void ice_set_rx_mode(struct net_device *netdev)
2601{
2602        struct ice_netdev_priv *np = netdev_priv(netdev);
2603        struct ice_vsi *vsi = np->vsi;
2604
2605        if (!vsi)
2606                return;
2607
2608        /* Set the flags to synchronize filters
2609         * ndo_set_rx_mode may be triggered even without a change in netdev
2610         * flags
2611         */
2612        set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
2613        set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
2614        set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
2615
2616        /* schedule our worker thread which will take care of
2617         * applying the new filter changes
2618         */
2619        ice_service_task_schedule(vsi->back);
2620}
2621
2622/**
2623 * ice_fdb_add - add an entry to the hardware database
2624 * @ndm: the input from the stack
2625 * @tb: pointer to array of nladdr (unused)
2626 * @dev: the net device pointer
2627 * @addr: the MAC address entry being added
2628 * @vid: VLAN id
2629 * @flags: instructions from stack about fdb operation
2630 */
2631static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
2632                       struct net_device *dev, const unsigned char *addr,
2633                       u16 vid, u16 flags)
2634{
2635        int err;
2636
2637        if (vid) {
2638                netdev_err(dev, "VLANs aren't supported yet for dev_uc|mc_add()\n");
2639                return -EINVAL;
2640        }
2641        if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
2642                netdev_err(dev, "FDB only supports static addresses\n");
2643                return -EINVAL;
2644        }
2645
2646        if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
2647                err = dev_uc_add_excl(dev, addr);
2648        else if (is_multicast_ether_addr(addr))
2649                err = dev_mc_add_excl(dev, addr);
2650        else
2651                err = -EINVAL;
2652
2653        /* Only return duplicate errors if NLM_F_EXCL is set */
2654        if (err == -EEXIST && !(flags & NLM_F_EXCL))
2655                err = 0;
2656
2657        return err;
2658}
2659
2660/**
2661 * ice_fdb_del - delete an entry from the hardware database
2662 * @ndm: the input from the stack
2663 * @tb: pointer to array of nladdr (unused)
2664 * @dev: the net device pointer
2665 * @addr: the MAC address entry being added
2666 * @vid: VLAN id
2667 */
2668static int ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
2669                       struct net_device *dev, const unsigned char *addr,
2670                       __always_unused u16 vid)
2671{
2672        int err;
2673
2674        if (ndm->ndm_state & NUD_PERMANENT) {
2675                netdev_err(dev, "FDB only supports static addresses\n");
2676                return -EINVAL;
2677        }
2678
2679        if (is_unicast_ether_addr(addr))
2680                err = dev_uc_del(dev, addr);
2681        else if (is_multicast_ether_addr(addr))
2682                err = dev_mc_del(dev, addr);
2683        else
2684                err = -EINVAL;
2685
2686        return err;
2687}
2688
2689/**
2690 * ice_set_features - set the netdev feature flags
2691 * @netdev: ptr to the netdev being adjusted
2692 * @features: the feature set that the stack is suggesting
2693 */
2694static int ice_set_features(struct net_device *netdev,
2695                            netdev_features_t features)
2696{
2697        struct ice_netdev_priv *np = netdev_priv(netdev);
2698        struct ice_vsi *vsi = np->vsi;
2699        int ret = 0;
2700
2701        if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH))
2702                ret = ice_vsi_manage_rss_lut(vsi, true);
2703        else if (!(features & NETIF_F_RXHASH) &&
2704                 netdev->features & NETIF_F_RXHASH)
2705                ret = ice_vsi_manage_rss_lut(vsi, false);
2706
2707        if ((features & NETIF_F_HW_VLAN_CTAG_RX) &&
2708            !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
2709                ret = ice_vsi_manage_vlan_stripping(vsi, true);
2710        else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) &&
2711                 (netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
2712                ret = ice_vsi_manage_vlan_stripping(vsi, false);
2713        else if ((features & NETIF_F_HW_VLAN_CTAG_TX) &&
2714                 !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
2715                ret = ice_vsi_manage_vlan_insertion(vsi);
2716        else if (!(features & NETIF_F_HW_VLAN_CTAG_TX) &&
2717                 (netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
2718                ret = ice_vsi_manage_vlan_insertion(vsi);
2719
2720        return ret;
2721}
2722
2723/**
2724 * ice_vsi_vlan_setup - Setup vlan offload properties on a VSI
2725 * @vsi: VSI to setup vlan properties for
2726 */
2727static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
2728{
2729        int ret = 0;
2730
2731        if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2732                ret = ice_vsi_manage_vlan_stripping(vsi, true);
2733        if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)
2734                ret = ice_vsi_manage_vlan_insertion(vsi);
2735
2736        return ret;
2737}
2738
2739/**
2740 * ice_vsi_cfg - Setup the VSI
2741 * @vsi: the VSI being configured
2742 *
2743 * Return 0 on success and negative value on error
2744 */
2745static int ice_vsi_cfg(struct ice_vsi *vsi)
2746{
2747        int err;
2748
2749        if (vsi->netdev) {
2750                ice_set_rx_mode(vsi->netdev);
2751
2752                err = ice_vsi_vlan_setup(vsi);
2753
2754                if (err)
2755                        return err;
2756        }
2757
2758        err = ice_vsi_cfg_lan_txqs(vsi);
2759        if (!err)
2760                err = ice_vsi_cfg_rxqs(vsi);
2761
2762        return err;
2763}
2764
2765/**
2766 * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
2767 * @vsi: the VSI being configured
2768 */
2769static void ice_napi_enable_all(struct ice_vsi *vsi)
2770{
2771        int q_idx;
2772
2773        if (!vsi->netdev)
2774                return;
2775
2776        for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
2777                struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
2778
2779                if (q_vector->rx.ring || q_vector->tx.ring)
2780                        napi_enable(&q_vector->napi);
2781        }
2782}
2783
2784/**
2785 * ice_up_complete - Finish the last steps of bringing up a connection
2786 * @vsi: The VSI being configured
2787 *
2788 * Return 0 on success and negative value on error
2789 */
2790static int ice_up_complete(struct ice_vsi *vsi)
2791{
2792        struct ice_pf *pf = vsi->back;
2793        int err;
2794
2795        if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
2796                ice_vsi_cfg_msix(vsi);
2797        else
2798                return -ENOTSUPP;
2799
2800        /* Enable only Rx rings, Tx rings were enabled by the FW when the
2801         * Tx queue group list was configured and the context bits were
2802         * programmed using ice_vsi_cfg_txqs
2803         */
2804        err = ice_vsi_start_rx_rings(vsi);
2805        if (err)
2806                return err;
2807
2808        clear_bit(__ICE_DOWN, vsi->state);
2809        ice_napi_enable_all(vsi);
2810        ice_vsi_ena_irq(vsi);
2811
2812        if (vsi->port_info &&
2813            (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
2814            vsi->netdev) {
2815                ice_print_link_msg(vsi, true);
2816                netif_tx_start_all_queues(vsi->netdev);
2817                netif_carrier_on(vsi->netdev);
2818        }
2819
2820        ice_service_task_schedule(pf);
2821
2822        return err;
2823}
2824
2825/**
2826 * ice_up - Bring the connection back up after being down
2827 * @vsi: VSI being configured
2828 */
2829int ice_up(struct ice_vsi *vsi)
2830{
2831        int err;
2832
2833        err = ice_vsi_cfg(vsi);
2834        if (!err)
2835                err = ice_up_complete(vsi);
2836
2837        return err;
2838}
2839
2840/**
2841 * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring
2842 * @ring: Tx or Rx ring to read stats from
2843 * @pkts: packets stats counter
2844 * @bytes: bytes stats counter
2845 *
2846 * This function fetches stats from the ring considering the atomic operations
2847 * that needs to be performed to read u64 values in 32 bit machine.
2848 */
2849static void ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts,
2850                                         u64 *bytes)
2851{
2852        unsigned int start;
2853        *pkts = 0;
2854        *bytes = 0;
2855
2856        if (!ring)
2857                return;
2858        do {
2859                start = u64_stats_fetch_begin_irq(&ring->syncp);
2860                *pkts = ring->stats.pkts;
2861                *bytes = ring->stats.bytes;
2862        } while (u64_stats_fetch_retry_irq(&ring->syncp, start));
2863}
2864
2865/**
2866 * ice_update_vsi_ring_stats - Update VSI stats counters
2867 * @vsi: the VSI to be updated
2868 */
2869static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
2870{
2871        struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats;
2872        struct ice_ring *ring;
2873        u64 pkts, bytes;
2874        int i;
2875
2876        /* reset netdev stats */
2877        vsi_stats->tx_packets = 0;
2878        vsi_stats->tx_bytes = 0;
2879        vsi_stats->rx_packets = 0;
2880        vsi_stats->rx_bytes = 0;
2881
2882        /* reset non-netdev (extended) stats */
2883        vsi->tx_restart = 0;
2884        vsi->tx_busy = 0;
2885        vsi->tx_linearize = 0;
2886        vsi->rx_buf_failed = 0;
2887        vsi->rx_page_failed = 0;
2888
2889        rcu_read_lock();
2890
2891        /* update Tx rings counters */
2892        ice_for_each_txq(vsi, i) {
2893                ring = READ_ONCE(vsi->tx_rings[i]);
2894                ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
2895                vsi_stats->tx_packets += pkts;
2896                vsi_stats->tx_bytes += bytes;
2897                vsi->tx_restart += ring->tx_stats.restart_q;
2898                vsi->tx_busy += ring->tx_stats.tx_busy;
2899                vsi->tx_linearize += ring->tx_stats.tx_linearize;
2900        }
2901
2902        /* update Rx rings counters */
2903        ice_for_each_rxq(vsi, i) {
2904                ring = READ_ONCE(vsi->rx_rings[i]);
2905                ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
2906                vsi_stats->rx_packets += pkts;
2907                vsi_stats->rx_bytes += bytes;
2908                vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed;
2909                vsi->rx_page_failed += ring->rx_stats.alloc_page_failed;
2910        }
2911
2912        rcu_read_unlock();
2913}
2914
2915/**
2916 * ice_update_vsi_stats - Update VSI stats counters
2917 * @vsi: the VSI to be updated
2918 */
2919static void ice_update_vsi_stats(struct ice_vsi *vsi)
2920{
2921        struct rtnl_link_stats64 *cur_ns = &vsi->net_stats;
2922        struct ice_eth_stats *cur_es = &vsi->eth_stats;
2923        struct ice_pf *pf = vsi->back;
2924
2925        if (test_bit(__ICE_DOWN, vsi->state) ||
2926            test_bit(__ICE_CFG_BUSY, pf->state))
2927                return;
2928
2929        /* get stats as recorded by Tx/Rx rings */
2930        ice_update_vsi_ring_stats(vsi);
2931
2932        /* get VSI stats as recorded by the hardware */
2933        ice_update_eth_stats(vsi);
2934
2935        cur_ns->tx_errors = cur_es->tx_errors;
2936        cur_ns->rx_dropped = cur_es->rx_discards;
2937        cur_ns->tx_dropped = cur_es->tx_discards;
2938        cur_ns->multicast = cur_es->rx_multicast;
2939
2940        /* update some more netdev stats if this is main VSI */
2941        if (vsi->type == ICE_VSI_PF) {
2942                cur_ns->rx_crc_errors = pf->stats.crc_errors;
2943                cur_ns->rx_errors = pf->stats.crc_errors +
2944                                    pf->stats.illegal_bytes;
2945                cur_ns->rx_length_errors = pf->stats.rx_len_errors;
2946        }
2947}
2948
2949/**
2950 * ice_update_pf_stats - Update PF port stats counters
2951 * @pf: PF whose stats needs to be updated
2952 */
2953static void ice_update_pf_stats(struct ice_pf *pf)
2954{
2955        struct ice_hw_port_stats *prev_ps, *cur_ps;
2956        struct ice_hw *hw = &pf->hw;
2957        u8 pf_id;
2958
2959        prev_ps = &pf->stats_prev;
2960        cur_ps = &pf->stats;
2961        pf_id = hw->pf_id;
2962
2963        ice_stat_update40(hw, GLPRT_GORCH(pf_id), GLPRT_GORCL(pf_id),
2964                          pf->stat_prev_loaded, &prev_ps->eth.rx_bytes,
2965                          &cur_ps->eth.rx_bytes);
2966
2967        ice_stat_update40(hw, GLPRT_UPRCH(pf_id), GLPRT_UPRCL(pf_id),
2968                          pf->stat_prev_loaded, &prev_ps->eth.rx_unicast,
2969                          &cur_ps->eth.rx_unicast);
2970
2971        ice_stat_update40(hw, GLPRT_MPRCH(pf_id), GLPRT_MPRCL(pf_id),
2972                          pf->stat_prev_loaded, &prev_ps->eth.rx_multicast,
2973                          &cur_ps->eth.rx_multicast);
2974
2975        ice_stat_update40(hw, GLPRT_BPRCH(pf_id), GLPRT_BPRCL(pf_id),
2976                          pf->stat_prev_loaded, &prev_ps->eth.rx_broadcast,
2977                          &cur_ps->eth.rx_broadcast);
2978
2979        ice_stat_update40(hw, GLPRT_GOTCH(pf_id), GLPRT_GOTCL(pf_id),
2980                          pf->stat_prev_loaded, &prev_ps->eth.tx_bytes,
2981                          &cur_ps->eth.tx_bytes);
2982
2983        ice_stat_update40(hw, GLPRT_UPTCH(pf_id), GLPRT_UPTCL(pf_id),
2984                          pf->stat_prev_loaded, &prev_ps->eth.tx_unicast,
2985                          &cur_ps->eth.tx_unicast);
2986
2987        ice_stat_update40(hw, GLPRT_MPTCH(pf_id), GLPRT_MPTCL(pf_id),
2988                          pf->stat_prev_loaded, &prev_ps->eth.tx_multicast,
2989                          &cur_ps->eth.tx_multicast);
2990
2991        ice_stat_update40(hw, GLPRT_BPTCH(pf_id), GLPRT_BPTCL(pf_id),
2992                          pf->stat_prev_loaded, &prev_ps->eth.tx_broadcast,
2993                          &cur_ps->eth.tx_broadcast);
2994
2995        ice_stat_update32(hw, GLPRT_TDOLD(pf_id), pf->stat_prev_loaded,
2996                          &prev_ps->tx_dropped_link_down,
2997                          &cur_ps->tx_dropped_link_down);
2998
2999        ice_stat_update40(hw, GLPRT_PRC64H(pf_id), GLPRT_PRC64L(pf_id),
3000                          pf->stat_prev_loaded, &prev_ps->rx_size_64,
3001                          &cur_ps->rx_size_64);
3002
3003        ice_stat_update40(hw, GLPRT_PRC127H(pf_id), GLPRT_PRC127L(pf_id),
3004                          pf->stat_prev_loaded, &prev_ps->rx_size_127,
3005                          &cur_ps->rx_size_127);
3006
3007        ice_stat_update40(hw, GLPRT_PRC255H(pf_id), GLPRT_PRC255L(pf_id),
3008                          pf->stat_prev_loaded, &prev_ps->rx_size_255,
3009                          &cur_ps->rx_size_255);
3010
3011        ice_stat_update40(hw, GLPRT_PRC511H(pf_id), GLPRT_PRC511L(pf_id),
3012                          pf->stat_prev_loaded, &prev_ps->rx_size_511,
3013                          &cur_ps->rx_size_511);
3014
3015        ice_stat_update40(hw, GLPRT_PRC1023H(pf_id),
3016                          GLPRT_PRC1023L(pf_id), pf->stat_prev_loaded,
3017                          &prev_ps->rx_size_1023, &cur_ps->rx_size_1023);
3018
3019        ice_stat_update40(hw, GLPRT_PRC1522H(pf_id),
3020                          GLPRT_PRC1522L(pf_id), pf->stat_prev_loaded,
3021                          &prev_ps->rx_size_1522, &cur_ps->rx_size_1522);
3022
3023        ice_stat_update40(hw, GLPRT_PRC9522H(pf_id),
3024                          GLPRT_PRC9522L(pf_id), pf->stat_prev_loaded,
3025                          &prev_ps->rx_size_big, &cur_ps->rx_size_big);
3026
3027        ice_stat_update40(hw, GLPRT_PTC64H(pf_id), GLPRT_PTC64L(pf_id),
3028                          pf->stat_prev_loaded, &prev_ps->tx_size_64,
3029                          &cur_ps->tx_size_64);
3030
3031        ice_stat_update40(hw, GLPRT_PTC127H(pf_id), GLPRT_PTC127L(pf_id),
3032                          pf->stat_prev_loaded, &prev_ps->tx_size_127,
3033                          &cur_ps->tx_size_127);
3034
3035        ice_stat_update40(hw, GLPRT_PTC255H(pf_id), GLPRT_PTC255L(pf_id),
3036                          pf->stat_prev_loaded, &prev_ps->tx_size_255,
3037                          &cur_ps->tx_size_255);
3038
3039        ice_stat_update40(hw, GLPRT_PTC511H(pf_id), GLPRT_PTC511L(pf_id),
3040                          pf->stat_prev_loaded, &prev_ps->tx_size_511,
3041                          &cur_ps->tx_size_511);
3042
3043        ice_stat_update40(hw, GLPRT_PTC1023H(pf_id),
3044                          GLPRT_PTC1023L(pf_id), pf->stat_prev_loaded,
3045                          &prev_ps->tx_size_1023, &cur_ps->tx_size_1023);
3046
3047        ice_stat_update40(hw, GLPRT_PTC1522H(pf_id),
3048                          GLPRT_PTC1522L(pf_id), pf->stat_prev_loaded,
3049                          &prev_ps->tx_size_1522, &cur_ps->tx_size_1522);
3050
3051        ice_stat_update40(hw, GLPRT_PTC9522H(pf_id),
3052                          GLPRT_PTC9522L(pf_id), pf->stat_prev_loaded,
3053                          &prev_ps->tx_size_big, &cur_ps->tx_size_big);
3054
3055        ice_stat_update32(hw, GLPRT_LXONRXC(pf_id), pf->stat_prev_loaded,
3056                          &prev_ps->link_xon_rx, &cur_ps->link_xon_rx);
3057
3058        ice_stat_update32(hw, GLPRT_LXOFFRXC(pf_id), pf->stat_prev_loaded,
3059                          &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx);
3060
3061        ice_stat_update32(hw, GLPRT_LXONTXC(pf_id), pf->stat_prev_loaded,
3062                          &prev_ps->link_xon_tx, &cur_ps->link_xon_tx);
3063
3064        ice_stat_update32(hw, GLPRT_LXOFFTXC(pf_id), pf->stat_prev_loaded,
3065                          &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx);
3066
3067        ice_stat_update32(hw, GLPRT_CRCERRS(pf_id), pf->stat_prev_loaded,
3068                          &prev_ps->crc_errors, &cur_ps->crc_errors);
3069
3070        ice_stat_update32(hw, GLPRT_ILLERRC(pf_id), pf->stat_prev_loaded,
3071                          &prev_ps->illegal_bytes, &cur_ps->illegal_bytes);
3072
3073        ice_stat_update32(hw, GLPRT_MLFC(pf_id), pf->stat_prev_loaded,
3074                          &prev_ps->mac_local_faults,
3075                          &cur_ps->mac_local_faults);
3076
3077        ice_stat_update32(hw, GLPRT_MRFC(pf_id), pf->stat_prev_loaded,
3078                          &prev_ps->mac_remote_faults,
3079                          &cur_ps->mac_remote_faults);
3080
3081        ice_stat_update32(hw, GLPRT_RLEC(pf_id), pf->stat_prev_loaded,
3082                          &prev_ps->rx_len_errors, &cur_ps->rx_len_errors);
3083
3084        ice_stat_update32(hw, GLPRT_RUC(pf_id), pf->stat_prev_loaded,
3085                          &prev_ps->rx_undersize, &cur_ps->rx_undersize);
3086
3087        ice_stat_update32(hw, GLPRT_RFC(pf_id), pf->stat_prev_loaded,
3088                          &prev_ps->rx_fragments, &cur_ps->rx_fragments);
3089
3090        ice_stat_update32(hw, GLPRT_ROC(pf_id), pf->stat_prev_loaded,
3091                          &prev_ps->rx_oversize, &cur_ps->rx_oversize);
3092
3093        ice_stat_update32(hw, GLPRT_RJC(pf_id), pf->stat_prev_loaded,
3094                          &prev_ps->rx_jabber, &cur_ps->rx_jabber);
3095
3096        pf->stat_prev_loaded = true;
3097}
3098
3099/**
3100 * ice_get_stats64 - get statistics for network device structure
3101 * @netdev: network interface device structure
3102 * @stats: main device statistics structure
3103 */
3104static
3105void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
3106{
3107        struct ice_netdev_priv *np = netdev_priv(netdev);
3108        struct rtnl_link_stats64 *vsi_stats;
3109        struct ice_vsi *vsi = np->vsi;
3110
3111        vsi_stats = &vsi->net_stats;
3112
3113        if (test_bit(__ICE_DOWN, vsi->state) || !vsi->num_txq || !vsi->num_rxq)
3114                return;
3115        /* netdev packet/byte stats come from ring counter. These are obtained
3116         * by summing up ring counters (done by ice_update_vsi_ring_stats).
3117         */
3118        ice_update_vsi_ring_stats(vsi);
3119        stats->tx_packets = vsi_stats->tx_packets;
3120        stats->tx_bytes = vsi_stats->tx_bytes;
3121        stats->rx_packets = vsi_stats->rx_packets;
3122        stats->rx_bytes = vsi_stats->rx_bytes;
3123
3124        /* The rest of the stats can be read from the hardware but instead we
3125         * just return values that the watchdog task has already obtained from
3126         * the hardware.
3127         */
3128        stats->multicast = vsi_stats->multicast;
3129        stats->tx_errors = vsi_stats->tx_errors;
3130        stats->tx_dropped = vsi_stats->tx_dropped;
3131        stats->rx_errors = vsi_stats->rx_errors;
3132        stats->rx_dropped = vsi_stats->rx_dropped;
3133        stats->rx_crc_errors = vsi_stats->rx_crc_errors;
3134        stats->rx_length_errors = vsi_stats->rx_length_errors;
3135}
3136
3137/**
3138 * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
3139 * @vsi: VSI having NAPI disabled
3140 */
3141static void ice_napi_disable_all(struct ice_vsi *vsi)
3142{
3143        int q_idx;
3144
3145        if (!vsi->netdev)
3146                return;
3147
3148        for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
3149                struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
3150
3151                if (q_vector->rx.ring || q_vector->tx.ring)
3152                        napi_disable(&q_vector->napi);
3153        }
3154}
3155
3156/**
3157 * ice_force_phys_link_state - Force the physical link state
3158 * @vsi: VSI to force the physical link state to up/down
3159 * @link_up: true/false indicates to set the physical link to up/down
3160 *
3161 * Force the physical link state by getting the current PHY capabilities from
3162 * hardware and setting the PHY config based on the determined capabilities. If
3163 * link changes a link event will be triggered because both the Enable Automatic
3164 * Link Update and LESM Enable bits are set when setting the PHY capabilities.
3165 *
3166 * Returns 0 on success, negative on failure
3167 */
3168static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
3169{
3170        struct ice_aqc_get_phy_caps_data *pcaps;
3171        struct ice_aqc_set_phy_cfg_data *cfg;
3172        struct ice_port_info *pi;
3173        struct device *dev;
3174        int retcode;
3175
3176        if (!vsi || !vsi->port_info || !vsi->back)
3177                return -EINVAL;
3178        if (vsi->type != ICE_VSI_PF)
3179                return 0;
3180
3181        dev = &vsi->back->pdev->dev;
3182
3183        pi = vsi->port_info;
3184
3185        pcaps = devm_kzalloc(dev, sizeof(*pcaps), GFP_KERNEL);
3186        if (!pcaps)
3187                return -ENOMEM;
3188
3189        retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
3190                                      NULL);
3191        if (retcode) {
3192                dev_err(dev,
3193                        "Failed to get phy capabilities, VSI %d error %d\n",
3194                        vsi->vsi_num, retcode);
3195                retcode = -EIO;
3196                goto out;
3197        }
3198
3199        /* No change in link */
3200        if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
3201            link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
3202                goto out;
3203
3204        cfg = devm_kzalloc(dev, sizeof(*cfg), GFP_KERNEL);
3205        if (!cfg) {
3206                retcode = -ENOMEM;
3207                goto out;
3208        }
3209
3210        cfg->phy_type_low = pcaps->phy_type_low;
3211        cfg->phy_type_high = pcaps->phy_type_high;
3212        cfg->caps = pcaps->caps | ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
3213        cfg->low_power_ctrl = pcaps->low_power_ctrl;
3214        cfg->eee_cap = pcaps->eee_cap;
3215        cfg->eeer_value = pcaps->eeer_value;
3216        cfg->link_fec_opt = pcaps->link_fec_options;
3217        if (link_up)
3218                cfg->caps |= ICE_AQ_PHY_ENA_LINK;
3219        else
3220                cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
3221
3222        retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi->lport, cfg, NULL);
3223        if (retcode) {
3224                dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
3225                        vsi->vsi_num, retcode);
3226                retcode = -EIO;
3227        }
3228
3229        devm_kfree(dev, cfg);
3230out:
3231        devm_kfree(dev, pcaps);
3232        return retcode;
3233}
3234
3235/**
3236 * ice_down - Shutdown the connection
3237 * @vsi: The VSI being stopped
3238 */
3239int ice_down(struct ice_vsi *vsi)
3240{
3241        int i, tx_err, rx_err, link_err = 0;
3242
3243        /* Caller of this function is expected to set the
3244         * vsi->state __ICE_DOWN bit
3245         */
3246        if (vsi->netdev) {
3247                netif_carrier_off(vsi->netdev);
3248                netif_tx_disable(vsi->netdev);
3249        }
3250
3251        ice_vsi_dis_irq(vsi);
3252
3253        tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
3254        if (tx_err)
3255                netdev_err(vsi->netdev,
3256                           "Failed stop Tx rings, VSI %d error %d\n",
3257                           vsi->vsi_num, tx_err);
3258
3259        rx_err = ice_vsi_stop_rx_rings(vsi);
3260        if (rx_err)
3261                netdev_err(vsi->netdev,
3262                           "Failed stop Rx rings, VSI %d error %d\n",
3263                           vsi->vsi_num, rx_err);
3264
3265        ice_napi_disable_all(vsi);
3266
3267        if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
3268                link_err = ice_force_phys_link_state(vsi, false);
3269                if (link_err)
3270                        netdev_err(vsi->netdev,
3271                                   "Failed to set physical link down, VSI %d error %d\n",
3272                                   vsi->vsi_num, link_err);
3273        }
3274
3275        ice_for_each_txq(vsi, i)
3276                ice_clean_tx_ring(vsi->tx_rings[i]);
3277
3278        ice_for_each_rxq(vsi, i)
3279                ice_clean_rx_ring(vsi->rx_rings[i]);
3280
3281        if (tx_err || rx_err || link_err) {
3282                netdev_err(vsi->netdev,
3283                           "Failed to close VSI 0x%04X on switch 0x%04X\n",
3284                           vsi->vsi_num, vsi->vsw->sw_id);
3285                return -EIO;
3286        }
3287
3288        return 0;
3289}
3290
3291/**
3292 * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
3293 * @vsi: VSI having resources allocated
3294 *
3295 * Return 0 on success, negative on failure
3296 */
3297static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
3298{
3299        int i, err = 0;
3300
3301        if (!vsi->num_txq) {
3302                dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n",
3303                        vsi->vsi_num);
3304                return -EINVAL;
3305        }
3306
3307        ice_for_each_txq(vsi, i) {
3308                vsi->tx_rings[i]->netdev = vsi->netdev;
3309                err = ice_setup_tx_ring(vsi->tx_rings[i]);
3310                if (err)
3311                        break;
3312        }
3313
3314        return err;
3315}
3316
3317/**
3318 * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
3319 * @vsi: VSI having resources allocated
3320 *
3321 * Return 0 on success, negative on failure
3322 */
3323static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
3324{
3325        int i, err = 0;
3326
3327        if (!vsi->num_rxq) {
3328                dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n",
3329                        vsi->vsi_num);
3330                return -EINVAL;
3331        }
3332
3333        ice_for_each_rxq(vsi, i) {
3334                vsi->rx_rings[i]->netdev = vsi->netdev;
3335                err = ice_setup_rx_ring(vsi->rx_rings[i]);
3336                if (err)
3337                        break;
3338        }
3339
3340        return err;
3341}
3342
3343/**
3344 * ice_vsi_req_irq - Request IRQ from the OS
3345 * @vsi: The VSI IRQ is being requested for
3346 * @basename: name for the vector
3347 *
3348 * Return 0 on success and a negative value on error
3349 */
3350static int ice_vsi_req_irq(struct ice_vsi *vsi, char *basename)
3351{
3352        struct ice_pf *pf = vsi->back;
3353        int err = -EINVAL;
3354
3355        if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
3356                err = ice_vsi_req_irq_msix(vsi, basename);
3357
3358        return err;
3359}
3360
3361/**
3362 * ice_vsi_open - Called when a network interface is made active
3363 * @vsi: the VSI to open
3364 *
3365 * Initialization of the VSI
3366 *
3367 * Returns 0 on success, negative value on error
3368 */
3369static int ice_vsi_open(struct ice_vsi *vsi)
3370{
3371        char int_name[ICE_INT_NAME_STR_LEN];
3372        struct ice_pf *pf = vsi->back;
3373        int err;
3374
3375        /* allocate descriptors */
3376        err = ice_vsi_setup_tx_rings(vsi);
3377        if (err)
3378                goto err_setup_tx;
3379
3380        err = ice_vsi_setup_rx_rings(vsi);
3381        if (err)
3382                goto err_setup_rx;
3383
3384        err = ice_vsi_cfg(vsi);
3385        if (err)
3386                goto err_setup_rx;
3387
3388        snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
3389                 dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
3390        err = ice_vsi_req_irq(vsi, int_name);
3391        if (err)
3392                goto err_setup_rx;
3393
3394        /* Notify the stack of the actual queue counts. */
3395        err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq);
3396        if (err)
3397                goto err_set_qs;
3398
3399        err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq);
3400        if (err)
3401                goto err_set_qs;
3402
3403        err = ice_up_complete(vsi);
3404        if (err)
3405                goto err_up_complete;
3406
3407        return 0;
3408
3409err_up_complete:
3410        ice_down(vsi);
3411err_set_qs:
3412        ice_vsi_free_irq(vsi);
3413err_setup_rx:
3414        ice_vsi_free_rx_rings(vsi);
3415err_setup_tx:
3416        ice_vsi_free_tx_rings(vsi);
3417
3418        return err;
3419}
3420
3421/**
3422 * ice_vsi_release_all - Delete all VSIs
3423 * @pf: PF from which all VSIs are being removed
3424 */
3425static void ice_vsi_release_all(struct ice_pf *pf)
3426{
3427        int err, i;
3428
3429        if (!pf->vsi)
3430                return;
3431
3432        for (i = 0; i < pf->num_alloc_vsi; i++) {
3433                if (!pf->vsi[i])
3434                        continue;
3435
3436                err = ice_vsi_release(pf->vsi[i]);
3437                if (err)
3438                        dev_dbg(&pf->pdev->dev,
3439                                "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
3440                                i, err, pf->vsi[i]->vsi_num);
3441        }
3442}
3443
3444/**
3445 * ice_dis_vsi - pause a VSI
3446 * @vsi: the VSI being paused
3447 * @locked: is the rtnl_lock already held
3448 */
3449static void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
3450{
3451        if (test_bit(__ICE_DOWN, vsi->state))
3452                return;
3453
3454        set_bit(__ICE_NEEDS_RESTART, vsi->state);
3455
3456        if (vsi->type == ICE_VSI_PF && vsi->netdev) {
3457                if (netif_running(vsi->netdev)) {
3458                        if (!locked) {
3459                                rtnl_lock();
3460                                vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
3461                                rtnl_unlock();
3462                        } else {
3463                                vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
3464                        }
3465                } else {
3466                        ice_vsi_close(vsi);
3467                }
3468        }
3469}
3470
3471/**
3472 * ice_ena_vsi - resume a VSI
3473 * @vsi: the VSI being resume
3474 */
3475static int ice_ena_vsi(struct ice_vsi *vsi)
3476{
3477        int err = 0;
3478
3479        if (test_and_clear_bit(__ICE_NEEDS_RESTART, vsi->state) &&
3480            vsi->netdev) {
3481                if (netif_running(vsi->netdev)) {
3482                        rtnl_lock();
3483                        err = vsi->netdev->netdev_ops->ndo_open(vsi->netdev);
3484                        rtnl_unlock();
3485                } else {
3486                        err = ice_vsi_open(vsi);
3487                }
3488        }
3489
3490        return err;
3491}
3492
3493/**
3494 * ice_pf_dis_all_vsi - Pause all VSIs on a PF
3495 * @pf: the PF
3496 */
3497static void ice_pf_dis_all_vsi(struct ice_pf *pf)
3498{
3499        int v;
3500
3501        ice_for_each_vsi(pf, v)
3502                if (pf->vsi[v])
3503                        ice_dis_vsi(pf->vsi[v], false);
3504}
3505
3506/**
3507 * ice_pf_ena_all_vsi - Resume all VSIs on a PF
3508 * @pf: the PF
3509 */
3510static int ice_pf_ena_all_vsi(struct ice_pf *pf)
3511{
3512        int v;
3513
3514        ice_for_each_vsi(pf, v)
3515                if (pf->vsi[v])
3516                        if (ice_ena_vsi(pf->vsi[v]))
3517                                return -EIO;
3518
3519        return 0;
3520}
3521
3522/**
3523 * ice_vsi_rebuild_all - rebuild all VSIs in pf
3524 * @pf: the PF
3525 */
3526static int ice_vsi_rebuild_all(struct ice_pf *pf)
3527{
3528        int i;
3529
3530        /* loop through pf->vsi array and reinit the VSI if found */
3531        for (i = 0; i < pf->num_alloc_vsi; i++) {
3532                int err;
3533
3534                if (!pf->vsi[i])
3535                        continue;
3536
3537                /* VF VSI rebuild isn't supported yet */
3538                if (pf->vsi[i]->type == ICE_VSI_VF)
3539                        continue;
3540
3541                err = ice_vsi_rebuild(pf->vsi[i]);
3542                if (err) {
3543                        dev_err(&pf->pdev->dev,
3544                                "VSI at index %d rebuild failed\n",
3545                                pf->vsi[i]->idx);
3546                        return err;
3547                }
3548
3549                dev_info(&pf->pdev->dev,
3550                         "VSI at index %d rebuilt. vsi_num = 0x%x\n",
3551                         pf->vsi[i]->idx, pf->vsi[i]->vsi_num);
3552        }
3553
3554        return 0;
3555}
3556
3557/**
3558 * ice_vsi_replay_all - replay all VSIs configuration in the PF
3559 * @pf: the PF
3560 */
3561static int ice_vsi_replay_all(struct ice_pf *pf)
3562{
3563        struct ice_hw *hw = &pf->hw;
3564        enum ice_status ret;
3565        int i;
3566
3567        /* loop through pf->vsi array and replay the VSI if found */
3568        for (i = 0; i < pf->num_alloc_vsi; i++) {
3569                if (!pf->vsi[i])
3570                        continue;
3571
3572                ret = ice_replay_vsi(hw, pf->vsi[i]->idx);
3573                if (ret) {
3574                        dev_err(&pf->pdev->dev,
3575                                "VSI at index %d replay failed %d\n",
3576                                pf->vsi[i]->idx, ret);
3577                        return -EIO;
3578                }
3579
3580                /* Re-map HW VSI number, using VSI handle that has been
3581                 * previously validated in ice_replay_vsi() call above
3582                 */
3583                pf->vsi[i]->vsi_num = ice_get_hw_vsi_num(hw, pf->vsi[i]->idx);
3584
3585                dev_info(&pf->pdev->dev,
3586                         "VSI at index %d filter replayed successfully - vsi_num %i\n",
3587                         pf->vsi[i]->idx, pf->vsi[i]->vsi_num);
3588        }
3589
3590        /* Clean up replay filter after successful re-configuration */
3591        ice_replay_post(hw);
3592        return 0;
3593}
3594
3595/**
3596 * ice_rebuild - rebuild after reset
3597 * @pf: pf to rebuild
3598 */
3599static void ice_rebuild(struct ice_pf *pf)
3600{
3601        struct device *dev = &pf->pdev->dev;
3602        struct ice_hw *hw = &pf->hw;
3603        enum ice_status ret;
3604        int err, i;
3605
3606        if (test_bit(__ICE_DOWN, pf->state))
3607                goto clear_recovery;
3608
3609        dev_dbg(dev, "rebuilding pf\n");
3610
3611        ret = ice_init_all_ctrlq(hw);
3612        if (ret) {
3613                dev_err(dev, "control queues init failed %d\n", ret);
3614                goto err_init_ctrlq;
3615        }
3616
3617        ret = ice_clear_pf_cfg(hw);
3618        if (ret) {
3619                dev_err(dev, "clear PF configuration failed %d\n", ret);
3620                goto err_init_ctrlq;
3621        }
3622
3623        ice_clear_pxe_mode(hw);
3624
3625        ret = ice_get_caps(hw);
3626        if (ret) {
3627                dev_err(dev, "ice_get_caps failed %d\n", ret);
3628                goto err_init_ctrlq;
3629        }
3630
3631        err = ice_sched_init_port(hw->port_info);
3632        if (err)
3633                goto err_sched_init_port;
3634
3635        /* reset search_hint of irq_trackers to 0 since interrupts are
3636         * reclaimed and could be allocated from beginning during VSI rebuild
3637         */
3638        pf->sw_irq_tracker->search_hint = 0;
3639        pf->hw_irq_tracker->search_hint = 0;
3640
3641        err = ice_vsi_rebuild_all(pf);
3642        if (err) {
3643                dev_err(dev, "ice_vsi_rebuild_all failed\n");
3644                goto err_vsi_rebuild;
3645        }
3646
3647        err = ice_update_link_info(hw->port_info);
3648        if (err)
3649                dev_err(&pf->pdev->dev, "Get link status error %d\n", err);
3650
3651        /* Replay all VSIs Configuration, including filters after reset */
3652        if (ice_vsi_replay_all(pf)) {
3653                dev_err(&pf->pdev->dev,
3654                        "error replaying VSI configurations with switch filter rules\n");
3655                goto err_vsi_rebuild;
3656        }
3657
3658        /* start misc vector */
3659        if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
3660                err = ice_req_irq_msix_misc(pf);
3661                if (err) {
3662                        dev_err(dev, "misc vector setup failed: %d\n", err);
3663                        goto err_vsi_rebuild;
3664                }
3665        }
3666
3667        /* restart the VSIs that were rebuilt and running before the reset */
3668        err = ice_pf_ena_all_vsi(pf);
3669        if (err) {
3670                dev_err(&pf->pdev->dev, "error enabling VSIs\n");
3671                /* no need to disable VSIs in tear down path in ice_rebuild()
3672                 * since its already taken care in ice_vsi_open()
3673                 */
3674                goto err_vsi_rebuild;
3675        }
3676
3677        ice_reset_all_vfs(pf, true);
3678
3679        for (i = 0; i < pf->num_alloc_vsi; i++) {
3680                bool link_up;
3681
3682                if (!pf->vsi[i] || pf->vsi[i]->type != ICE_VSI_PF)
3683                        continue;
3684                ice_get_link_status(pf->vsi[i]->port_info, &link_up);
3685                if (link_up) {
3686                        netif_carrier_on(pf->vsi[i]->netdev);
3687                        netif_tx_wake_all_queues(pf->vsi[i]->netdev);
3688                } else {
3689                        netif_carrier_off(pf->vsi[i]->netdev);
3690                        netif_tx_stop_all_queues(pf->vsi[i]->netdev);
3691                }
3692        }
3693
3694        /* if we get here, reset flow is successful */
3695        clear_bit(__ICE_RESET_FAILED, pf->state);
3696        return;
3697
3698err_vsi_rebuild:
3699        ice_vsi_release_all(pf);
3700err_sched_init_port:
3701        ice_sched_cleanup_all(hw);
3702err_init_ctrlq:
3703        ice_shutdown_all_ctrlq(hw);
3704        set_bit(__ICE_RESET_FAILED, pf->state);
3705clear_recovery:
3706        /* set this bit in PF state to control service task scheduling */
3707        set_bit(__ICE_NEEDS_RESTART, pf->state);
3708        dev_err(dev, "Rebuild failed, unload and reload driver\n");
3709}
3710
3711/**
3712 * ice_change_mtu - NDO callback to change the MTU
3713 * @netdev: network interface device structure
3714 * @new_mtu: new value for maximum frame size
3715 *
3716 * Returns 0 on success, negative on failure
3717 */
3718static int ice_change_mtu(struct net_device *netdev, int new_mtu)
3719{
3720        struct ice_netdev_priv *np = netdev_priv(netdev);
3721        struct ice_vsi *vsi = np->vsi;
3722        struct ice_pf *pf = vsi->back;
3723        u8 count = 0;
3724
3725        if (new_mtu == netdev->mtu) {
3726                netdev_warn(netdev, "mtu is already %u\n", netdev->mtu);
3727                return 0;
3728        }
3729
3730        if (new_mtu < netdev->extended->min_mtu) {
3731                netdev_err(netdev, "new mtu invalid. min_mtu is %d\n",
3732                           netdev->extended->min_mtu);
3733                return -EINVAL;
3734        } else if (new_mtu > netdev->extended->max_mtu) {
3735                netdev_err(netdev, "new mtu invalid. max_mtu is %d\n",
3736                           netdev->extended->min_mtu);
3737                return -EINVAL;
3738        }
3739        /* if a reset is in progress, wait for some time for it to complete */
3740        do {
3741                if (ice_is_reset_in_progress(pf->state)) {
3742                        count++;
3743                        usleep_range(1000, 2000);
3744                } else {
3745                        break;
3746                }
3747
3748        } while (count < 100);
3749
3750        if (count == 100) {
3751                netdev_err(netdev, "can't change mtu. Device is busy\n");
3752                return -EBUSY;
3753        }
3754
3755        netdev->mtu = new_mtu;
3756
3757        /* if VSI is up, bring it down and then back up */
3758        if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
3759                int err;
3760
3761                err = ice_down(vsi);
3762                if (err) {
3763                        netdev_err(netdev, "change mtu if_up err %d\n", err);
3764                        return err;
3765                }
3766
3767                err = ice_up(vsi);
3768                if (err) {
3769                        netdev_err(netdev, "change mtu if_up err %d\n", err);
3770                        return err;
3771                }
3772        }
3773
3774        netdev_dbg(netdev, "changed mtu to %d\n", new_mtu);
3775        return 0;
3776}
3777
3778/**
3779 * ice_set_rss - Set RSS keys and lut
3780 * @vsi: Pointer to VSI structure
3781 * @seed: RSS hash seed
3782 * @lut: Lookup table
3783 * @lut_size: Lookup table size
3784 *
3785 * Returns 0 on success, negative on failure
3786 */
3787int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
3788{
3789        struct ice_pf *pf = vsi->back;
3790        struct ice_hw *hw = &pf->hw;
3791        enum ice_status status;
3792
3793        if (seed) {
3794                struct ice_aqc_get_set_rss_keys *buf =
3795                                  (struct ice_aqc_get_set_rss_keys *)seed;
3796
3797                status = ice_aq_set_rss_key(hw, vsi->idx, buf);
3798
3799                if (status) {
3800                        dev_err(&pf->pdev->dev,
3801                                "Cannot set RSS key, err %d aq_err %d\n",
3802                                status, hw->adminq.rq_last_status);
3803                        return -EIO;
3804                }
3805        }
3806
3807        if (lut) {
3808                status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
3809                                            lut, lut_size);
3810                if (status) {
3811                        dev_err(&pf->pdev->dev,
3812                                "Cannot set RSS lut, err %d aq_err %d\n",
3813                                status, hw->adminq.rq_last_status);
3814                        return -EIO;
3815                }
3816        }
3817
3818        return 0;
3819}
3820
3821/**
3822 * ice_get_rss - Get RSS keys and lut
3823 * @vsi: Pointer to VSI structure
3824 * @seed: Buffer to store the keys
3825 * @lut: Buffer to store the lookup table entries
3826 * @lut_size: Size of buffer to store the lookup table entries
3827 *
3828 * Returns 0 on success, negative on failure
3829 */
3830int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
3831{
3832        struct ice_pf *pf = vsi->back;
3833        struct ice_hw *hw = &pf->hw;
3834        enum ice_status status;
3835
3836        if (seed) {
3837                struct ice_aqc_get_set_rss_keys *buf =
3838                                  (struct ice_aqc_get_set_rss_keys *)seed;
3839
3840                status = ice_aq_get_rss_key(hw, vsi->idx, buf);
3841                if (status) {
3842                        dev_err(&pf->pdev->dev,
3843                                "Cannot get RSS key, err %d aq_err %d\n",
3844                                status, hw->adminq.rq_last_status);
3845                        return -EIO;
3846                }
3847        }
3848
3849        if (lut) {
3850                status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
3851                                            lut, lut_size);
3852                if (status) {
3853                        dev_err(&pf->pdev->dev,
3854                                "Cannot get RSS lut, err %d aq_err %d\n",
3855                                status, hw->adminq.rq_last_status);
3856                        return -EIO;
3857                }
3858        }
3859
3860        return 0;
3861}
3862
3863/**
3864 * ice_bridge_getlink - Get the hardware bridge mode
3865 * @skb: skb buff
3866 * @pid: process id
3867 * @seq: RTNL message seq
3868 * @dev: the netdev being configured
3869 * @filter_mask: filter mask passed in
3870 * @nlflags: netlink flags passed in
3871 *
3872 * Return the bridge mode (VEB/VEPA)
3873 */
3874static int
3875ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
3876                   struct net_device *dev, u32 filter_mask, int nlflags)
3877{
3878        struct ice_netdev_priv *np = netdev_priv(dev);
3879        struct ice_vsi *vsi = np->vsi;
3880        struct ice_pf *pf = vsi->back;
3881        u16 bmode;
3882
3883        bmode = pf->first_sw->bridge_mode;
3884
3885        return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bmode, 0, 0, nlflags,
3886                                       filter_mask, NULL);
3887}
3888
3889/**
3890 * ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA)
3891 * @vsi: Pointer to VSI structure
3892 * @bmode: Hardware bridge mode (VEB/VEPA)
3893 *
3894 * Returns 0 on success, negative on failure
3895 */
3896static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
3897{
3898        struct device *dev = &vsi->back->pdev->dev;
3899        struct ice_aqc_vsi_props *vsi_props;
3900        struct ice_hw *hw = &vsi->back->hw;
3901        struct ice_vsi_ctx *ctxt;
3902        enum ice_status status;
3903        int ret = 0;
3904
3905        vsi_props = &vsi->info;
3906
3907        ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
3908        if (!ctxt)
3909                return -ENOMEM;
3910
3911        ctxt->info = vsi->info;
3912
3913        if (bmode == BRIDGE_MODE_VEB)
3914                /* change from VEPA to VEB mode */
3915                ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
3916        else
3917                /* change from VEB to VEPA mode */
3918                ctxt->info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
3919        ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
3920
3921        status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
3922        if (status) {
3923                dev_err(dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n",
3924                        bmode, status, hw->adminq.sq_last_status);
3925                ret = -EIO;
3926                goto out;
3927        }
3928        /* Update sw flags for book keeping */
3929        vsi_props->sw_flags = ctxt->info.sw_flags;
3930
3931out:
3932        devm_kfree(dev, ctxt);
3933        return ret;
3934}
3935
3936/**
3937 * ice_bridge_setlink - Set the hardware bridge mode
3938 * @dev: the netdev being configured
3939 * @nlh: RTNL message
3940 * @flags: bridge setlink flags
3941 *
3942 * Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is
3943 * hooked up to. Iterates through the PF VSI list and sets the loopback mode (if
3944 * not already set for all VSIs connected to this switch. And also update the
3945 * unicast switch filter rules for the corresponding switch of the netdev.
3946 */
3947static int
3948ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
3949                   u16 __always_unused flags)
3950{
3951        struct ice_netdev_priv *np = netdev_priv(dev);
3952        struct ice_pf *pf = np->vsi->back;
3953        struct nlattr *attr, *br_spec;
3954        struct ice_hw *hw = &pf->hw;
3955        enum ice_status status;
3956        struct ice_sw *pf_sw;
3957        int rem, v, err = 0;
3958
3959        pf_sw = pf->first_sw;
3960        /* find the attribute in the netlink message */
3961        br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
3962
3963        nla_for_each_nested(attr, br_spec, rem) {
3964                __u16 mode;
3965
3966                if (nla_type(attr) != IFLA_BRIDGE_MODE)
3967                        continue;
3968                mode = nla_get_u16(attr);
3969                if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
3970                        return -EINVAL;
3971                /* Continue  if bridge mode is not being flipped */
3972                if (mode == pf_sw->bridge_mode)
3973                        continue;
3974                /* Iterates through the PF VSI list and update the loopback
3975                 * mode of the VSI
3976                 */
3977                ice_for_each_vsi(pf, v) {
3978                        if (!pf->vsi[v])
3979                                continue;
3980                        err = ice_vsi_update_bridge_mode(pf->vsi[v], mode);
3981                        if (err)
3982                                return err;
3983                }
3984
3985                hw->evb_veb = (mode == BRIDGE_MODE_VEB);
3986                /* Update the unicast switch filter rules for the corresponding
3987                 * switch of the netdev
3988                 */
3989                status = ice_update_sw_rule_bridge_mode(hw);
3990                if (status) {
3991                        netdev_err(dev, "switch rule update failed, mode = %d err %d aq_err %d\n",
3992                                   mode, status, hw->adminq.sq_last_status);
3993                        /* revert hw->evb_veb */
3994                        hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB);
3995                        return -EIO;
3996                }
3997
3998                pf_sw->bridge_mode = mode;
3999        }
4000
4001        return 0;
4002}
4003
4004/**
4005 * ice_tx_timeout - Respond to a Tx Hang
4006 * @netdev: network interface device structure
4007 */
4008static void ice_tx_timeout(struct net_device *netdev)
4009{
4010        struct ice_netdev_priv *np = netdev_priv(netdev);
4011        struct ice_ring *tx_ring = NULL;
4012        struct ice_vsi *vsi = np->vsi;
4013        struct ice_pf *pf = vsi->back;
4014        int hung_queue = -1;
4015        u32 i;
4016
4017        pf->tx_timeout_count++;
4018
4019        /* find the stopped queue the same way dev_watchdog() does */
4020        for (i = 0; i < netdev->num_tx_queues; i++) {
4021                unsigned long trans_start;
4022                struct netdev_queue *q;
4023
4024                q = netdev_get_tx_queue(netdev, i);
4025                trans_start = q->trans_start;
4026                if (netif_xmit_stopped(q) &&
4027                    time_after(jiffies,
4028                               trans_start + netdev->watchdog_timeo)) {
4029                        hung_queue = i;
4030                        break;
4031                }
4032        }
4033
4034        if (i == netdev->num_tx_queues)
4035                netdev_info(netdev, "tx_timeout: no netdev hung queue found\n");
4036        else
4037                /* now that we have an index, find the tx_ring struct */
4038                for (i = 0; i < vsi->num_txq; i++)
4039                        if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
4040                                if (hung_queue == vsi->tx_rings[i]->q_index) {
4041                                        tx_ring = vsi->tx_rings[i];
4042                                        break;
4043                                }
4044
4045        /* Reset recovery level if enough time has elapsed after last timeout.
4046         * Also ensure no new reset action happens before next timeout period.
4047         */
4048        if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ * 20)))
4049                pf->tx_timeout_recovery_level = 1;
4050        else if (time_before(jiffies, (pf->tx_timeout_last_recovery +
4051                                       netdev->watchdog_timeo)))
4052                return;
4053
4054        if (tx_ring) {
4055                struct ice_hw *hw = &pf->hw;
4056                u32 head, val = 0;
4057
4058                head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[hung_queue])) &
4059                        QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S;
4060                /* Read interrupt register */
4061                if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
4062                        val = rd32(hw,
4063                                   GLINT_DYN_CTL(tx_ring->q_vector->v_idx +
4064                                                 tx_ring->vsi->hw_base_vector));
4065
4066                netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %d, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
4067                            vsi->vsi_num, hung_queue, tx_ring->next_to_clean,
4068                            head, tx_ring->next_to_use, val);
4069        }
4070
4071        pf->tx_timeout_last_recovery = jiffies;
4072        netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n",
4073                    pf->tx_timeout_recovery_level, hung_queue);
4074
4075        switch (pf->tx_timeout_recovery_level) {
4076        case 1:
4077                set_bit(__ICE_PFR_REQ, pf->state);
4078                break;
4079        case 2:
4080                set_bit(__ICE_CORER_REQ, pf->state);
4081                break;
4082        case 3:
4083                set_bit(__ICE_GLOBR_REQ, pf->state);
4084                break;
4085        default:
4086                netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
4087                set_bit(__ICE_DOWN, pf->state);
4088                set_bit(__ICE_NEEDS_RESTART, vsi->state);
4089                set_bit(__ICE_SERVICE_DIS, pf->state);
4090                break;
4091        }
4092
4093        ice_service_task_schedule(pf);
4094        pf->tx_timeout_recovery_level++;
4095}
4096
4097/**
4098 * ice_open - Called when a network interface becomes active
4099 * @netdev: network interface device structure
4100 *
4101 * The open entry point is called when a network interface is made
4102 * active by the system (IFF_UP). At this point all resources needed
4103 * for transmit and receive operations are allocated, the interrupt
4104 * handler is registered with the OS, the netdev watchdog is enabled,
4105 * and the stack is notified that the interface is ready.
4106 *
4107 * Returns 0 on success, negative value on failure
4108 */
4109static int ice_open(struct net_device *netdev)
4110{
4111        struct ice_netdev_priv *np = netdev_priv(netdev);
4112        struct ice_vsi *vsi = np->vsi;
4113        int err;
4114
4115        if (test_bit(__ICE_NEEDS_RESTART, vsi->back->state)) {
4116                netdev_err(netdev, "driver needs to be unloaded and reloaded\n");
4117                return -EIO;
4118        }
4119
4120        netif_carrier_off(netdev);
4121
4122        err = ice_force_phys_link_state(vsi, true);
4123        if (err) {
4124                netdev_err(netdev,
4125                           "Failed to set physical link up, error %d\n", err);
4126                return err;
4127        }
4128
4129        err = ice_vsi_open(vsi);
4130        if (err)
4131                netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n",
4132                           vsi->vsi_num, vsi->vsw->sw_id);
4133        return err;
4134}
4135
4136/**
4137 * ice_stop - Disables a network interface
4138 * @netdev: network interface device structure
4139 *
4140 * The stop entry point is called when an interface is de-activated by the OS,
4141 * and the netdevice enters the DOWN state. The hardware is still under the
4142 * driver's control, but the netdev interface is disabled.
4143 *
4144 * Returns success only - not allowed to fail
4145 */
4146static int ice_stop(struct net_device *netdev)
4147{
4148        struct ice_netdev_priv *np = netdev_priv(netdev);
4149        struct ice_vsi *vsi = np->vsi;
4150
4151        ice_vsi_close(vsi);
4152
4153        return 0;
4154}
4155
4156/**
4157 * ice_features_check - Validate encapsulated packet conforms to limits
4158 * @skb: skb buffer
4159 * @netdev: This port's netdev
4160 * @features: Offload features that the stack believes apply
4161 */
4162static netdev_features_t
4163ice_features_check(struct sk_buff *skb,
4164                   struct net_device __always_unused *netdev,
4165                   netdev_features_t features)
4166{
4167        size_t len;
4168
4169        /* No point in doing any of this if neither checksum nor GSO are
4170         * being requested for this frame. We can rule out both by just
4171         * checking for CHECKSUM_PARTIAL
4172         */
4173        if (skb->ip_summed != CHECKSUM_PARTIAL)
4174                return features;
4175
4176        /* We cannot support GSO if the MSS is going to be less than
4177         * 64 bytes. If it is then we need to drop support for GSO.
4178         */
4179        if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
4180                features &= ~NETIF_F_GSO_MASK;
4181
4182        len = skb_network_header(skb) - skb->data;
4183        if (len & ~(ICE_TXD_MACLEN_MAX))
4184                goto out_rm_features;
4185
4186        len = skb_transport_header(skb) - skb_network_header(skb);
4187        if (len & ~(ICE_TXD_IPLEN_MAX))
4188                goto out_rm_features;
4189
4190        if (skb->encapsulation) {
4191                len = skb_inner_network_header(skb) - skb_transport_header(skb);
4192                if (len & ~(ICE_TXD_L4LEN_MAX))
4193                        goto out_rm_features;
4194
4195                len = skb_inner_transport_header(skb) -
4196                      skb_inner_network_header(skb);
4197                if (len & ~(ICE_TXD_IPLEN_MAX))
4198                        goto out_rm_features;
4199        }
4200
4201        return features;
4202out_rm_features:
4203        return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4204}
4205
4206static const struct net_device_ops ice_netdev_ops = {
4207        .ndo_size = sizeof(struct net_device_ops),
4208        .ndo_open = ice_open,
4209        .ndo_stop = ice_stop,
4210        .ndo_start_xmit = ice_start_xmit,
4211        .ndo_features_check = ice_features_check,
4212        .ndo_set_rx_mode = ice_set_rx_mode,
4213        .ndo_set_mac_address = ice_set_mac_address,
4214        .ndo_validate_addr = eth_validate_addr,
4215        .extended.ndo_change_mtu = ice_change_mtu,
4216        .ndo_get_stats64 = ice_get_stats64,
4217        .ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
4218        .ndo_set_vf_mac = ice_set_vf_mac,
4219        .ndo_get_vf_config = ice_get_vf_cfg,
4220        .extended.ndo_set_vf_trust = ice_set_vf_trust,
4221        .extended.ndo_set_vf_vlan = ice_set_vf_port_vlan,
4222        .ndo_set_vf_link_state = ice_set_vf_link_state,
4223        .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
4224        .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
4225        .ndo_set_features = ice_set_features,
4226        .ndo_bridge_getlink = ice_bridge_getlink,
4227        .ndo_bridge_setlink = ice_bridge_setlink,
4228        .ndo_fdb_add = ice_fdb_add,
4229        .ndo_fdb_del = ice_fdb_del,
4230        .ndo_tx_timeout = ice_tx_timeout,
4231};
4232