linux/drivers/net/ethernet/amazon/ena/ena_netdev.c
<<
>>
Prefs
   1/*
   2 * Copyright 2015 Amazon.com, Inc. or its affiliates.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  34
  35#ifdef CONFIG_RFS_ACCEL
  36#include <linux/cpu_rmap.h>
  37#endif /* CONFIG_RFS_ACCEL */
  38#include <linux/ethtool.h>
  39#include <linux/kernel.h>
  40#include <linux/module.h>
  41#include <linux/numa.h>
  42#include <linux/pci.h>
  43#include <linux/utsname.h>
  44#include <linux/version.h>
  45#include <linux/vmalloc.h>
  46#include <net/ip.h>
  47
  48#include "ena_netdev.h"
  49#include <linux/bpf_trace.h>
  50#include "ena_pci_id_tbl.h"
  51
  52MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
  53MODULE_DESCRIPTION(DEVICE_NAME);
  54MODULE_LICENSE("GPL");
  55
  56/* Time in jiffies before concluding the transmitter is hung. */
  57#define TX_TIMEOUT  (5 * HZ)
  58
  59#define ENA_NAPI_BUDGET 64
  60
  61#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
  62                NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
  63static int debug = -1;
  64module_param(debug, int, 0);
  65MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
  66
  67static struct ena_aenq_handlers aenq_handlers;
  68
  69static struct workqueue_struct *ena_wq;
  70
  71MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
  72
  73static int ena_rss_init_default(struct ena_adapter *adapter);
  74static void check_for_admin_com_state(struct ena_adapter *adapter);
  75static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
  76static int ena_restore_device(struct ena_adapter *adapter);
  77
  78static void ena_init_io_rings(struct ena_adapter *adapter,
  79                              int first_index, int count);
  80static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index,
  81                                   int count);
  82static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index,
  83                                  int count);
  84static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid);
  85static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
  86                                           int first_index,
  87                                           int count);
  88static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid);
  89static void ena_free_tx_resources(struct ena_adapter *adapter, int qid);
  90static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget);
  91static void ena_destroy_all_tx_queues(struct ena_adapter *adapter);
  92static void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
  93static void ena_napi_disable_in_range(struct ena_adapter *adapter,
  94                                      int first_index, int count);
  95static void ena_napi_enable_in_range(struct ena_adapter *adapter,
  96                                     int first_index, int count);
  97static int ena_up(struct ena_adapter *adapter);
  98static void ena_down(struct ena_adapter *adapter);
  99static void ena_unmask_interrupt(struct ena_ring *tx_ring,
 100                                 struct ena_ring *rx_ring);
 101static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
 102                                      struct ena_ring *rx_ring);
 103static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
 104                              struct ena_tx_buffer *tx_info);
 105static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
 106                                            int first_index, int count);
 107
 108static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
 109{
 110        struct ena_adapter *adapter = netdev_priv(dev);
 111
 112        /* Change the state of the device to trigger reset
 113         * Check that we are not in the middle or a trigger already
 114         */
 115
 116        if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
 117                return;
 118
 119        adapter->reset_reason = ENA_REGS_RESET_OS_NETDEV_WD;
 120        u64_stats_update_begin(&adapter->syncp);
 121        adapter->dev_stats.tx_timeout++;
 122        u64_stats_update_end(&adapter->syncp);
 123
 124        netif_err(adapter, tx_err, dev, "Transmit time out\n");
 125}
 126
 127static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
 128{
 129        int i;
 130
 131        for (i = 0; i < adapter->num_io_queues; i++)
 132                adapter->rx_ring[i].mtu = mtu;
 133}
 134
 135static int ena_change_mtu(struct net_device *dev, int new_mtu)
 136{
 137        struct ena_adapter *adapter = netdev_priv(dev);
 138        int ret;
 139
 140        ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
 141        if (!ret) {
 142                netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
 143                update_rx_ring_mtu(adapter, new_mtu);
 144                dev->mtu = new_mtu;
 145        } else {
 146                netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
 147                          new_mtu);
 148        }
 149
 150        return ret;
 151}
 152
 153static int ena_xmit_common(struct net_device *dev,
 154                           struct ena_ring *ring,
 155                           struct ena_tx_buffer *tx_info,
 156                           struct ena_com_tx_ctx *ena_tx_ctx,
 157                           u16 next_to_use,
 158                           u32 bytes)
 159{
 160        struct ena_adapter *adapter = netdev_priv(dev);
 161        int rc, nb_hw_desc;
 162
 163        if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
 164                                                ena_tx_ctx))) {
 165                netif_dbg(adapter, tx_queued, dev,
 166                          "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
 167                          ring->qid);
 168                ena_com_write_sq_doorbell(ring->ena_com_io_sq);
 169        }
 170
 171        /* prepare the packet's descriptors to dma engine */
 172        rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
 173                                &nb_hw_desc);
 174
 175        /* In case there isn't enough space in the queue for the packet,
 176         * we simply drop it. All other failure reasons of
 177         * ena_com_prepare_tx() are fatal and therefore require a device reset.
 178         */
 179        if (unlikely(rc)) {
 180                netif_err(adapter, tx_queued, dev,
 181                          "failed to prepare tx bufs\n");
 182                u64_stats_update_begin(&ring->syncp);
 183                ring->tx_stats.prepare_ctx_err++;
 184                u64_stats_update_end(&ring->syncp);
 185                if (rc != -ENOMEM) {
 186                        adapter->reset_reason =
 187                                ENA_REGS_RESET_DRIVER_INVALID_STATE;
 188                        set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
 189                }
 190                return rc;
 191        }
 192
 193        u64_stats_update_begin(&ring->syncp);
 194        ring->tx_stats.cnt++;
 195        ring->tx_stats.bytes += bytes;
 196        u64_stats_update_end(&ring->syncp);
 197
 198        tx_info->tx_descs = nb_hw_desc;
 199        tx_info->last_jiffies = jiffies;
 200        tx_info->print_once = 0;
 201
 202        ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
 203                                                 ring->ring_size);
 204        return 0;
 205}
 206
 207/* This is the XDP napi callback. XDP queues use a separate napi callback
 208 * than Rx/Tx queues.
 209 */
 210static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
 211{
 212        struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
 213        u32 xdp_work_done, xdp_budget;
 214        struct ena_ring *xdp_ring;
 215        int napi_comp_call = 0;
 216        int ret;
 217
 218        xdp_ring = ena_napi->xdp_ring;
 219        xdp_ring->first_interrupt = ena_napi->first_interrupt;
 220
 221        xdp_budget = budget;
 222
 223        if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
 224            test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
 225                napi_complete_done(napi, 0);
 226                return 0;
 227        }
 228
 229        xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
 230
 231        /* If the device is about to reset or down, avoid unmask
 232         * the interrupt and return 0 so NAPI won't reschedule
 233         */
 234        if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
 235                napi_complete_done(napi, 0);
 236                ret = 0;
 237        } else if (xdp_budget > xdp_work_done) {
 238                napi_comp_call = 1;
 239                if (napi_complete_done(napi, xdp_work_done))
 240                        ena_unmask_interrupt(xdp_ring, NULL);
 241                ena_update_ring_numa_node(xdp_ring, NULL);
 242                ret = xdp_work_done;
 243        } else {
 244                ret = xdp_budget;
 245        }
 246
 247        u64_stats_update_begin(&xdp_ring->syncp);
 248        xdp_ring->tx_stats.napi_comp += napi_comp_call;
 249        xdp_ring->tx_stats.tx_poll++;
 250        u64_stats_update_end(&xdp_ring->syncp);
 251
 252        return ret;
 253}
 254
 255static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring,
 256                               struct ena_tx_buffer *tx_info,
 257                               struct xdp_buff *xdp,
 258                               void **push_hdr,
 259                               u32 *push_len)
 260{
 261        struct ena_adapter *adapter = xdp_ring->adapter;
 262        struct ena_com_buf *ena_buf;
 263        dma_addr_t dma = 0;
 264        u32 size;
 265
 266        tx_info->xdpf = xdp_convert_buff_to_frame(xdp);
 267        size = tx_info->xdpf->len;
 268        ena_buf = tx_info->bufs;
 269
 270        /* llq push buffer */
 271        *push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
 272        *push_hdr = tx_info->xdpf->data;
 273
 274        if (size - *push_len > 0) {
 275                dma = dma_map_single(xdp_ring->dev,
 276                                     *push_hdr + *push_len,
 277                                     size - *push_len,
 278                                     DMA_TO_DEVICE);
 279                if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
 280                        goto error_report_dma_error;
 281
 282                tx_info->map_linear_data = 1;
 283                tx_info->num_of_bufs = 1;
 284        }
 285
 286        ena_buf->paddr = dma;
 287        ena_buf->len = size;
 288
 289        return 0;
 290
 291error_report_dma_error:
 292        u64_stats_update_begin(&xdp_ring->syncp);
 293        xdp_ring->tx_stats.dma_mapping_err++;
 294        u64_stats_update_end(&xdp_ring->syncp);
 295        netdev_warn(adapter->netdev, "failed to map xdp buff\n");
 296
 297        xdp_return_frame_rx_napi(tx_info->xdpf);
 298        tx_info->xdpf = NULL;
 299        tx_info->num_of_bufs = 0;
 300
 301        return -EINVAL;
 302}
 303
 304static int ena_xdp_xmit_buff(struct net_device *dev,
 305                             struct xdp_buff *xdp,
 306                             int qid,
 307                             struct ena_rx_buffer *rx_info)
 308{
 309        struct ena_adapter *adapter = netdev_priv(dev);
 310        struct ena_com_tx_ctx ena_tx_ctx = {0};
 311        struct ena_tx_buffer *tx_info;
 312        struct ena_ring *xdp_ring;
 313        u16 next_to_use, req_id;
 314        int rc;
 315        void *push_hdr;
 316        u32 push_len;
 317
 318        xdp_ring = &adapter->tx_ring[qid];
 319        next_to_use = xdp_ring->next_to_use;
 320        req_id = xdp_ring->free_ids[next_to_use];
 321        tx_info = &xdp_ring->tx_buffer_info[req_id];
 322        tx_info->num_of_bufs = 0;
 323        page_ref_inc(rx_info->page);
 324        tx_info->xdp_rx_page = rx_info->page;
 325
 326        rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len);
 327        if (unlikely(rc))
 328                goto error_drop_packet;
 329
 330        ena_tx_ctx.ena_bufs = tx_info->bufs;
 331        ena_tx_ctx.push_header = push_hdr;
 332        ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
 333        ena_tx_ctx.req_id = req_id;
 334        ena_tx_ctx.header_len = push_len;
 335
 336        rc = ena_xmit_common(dev,
 337                             xdp_ring,
 338                             tx_info,
 339                             &ena_tx_ctx,
 340                             next_to_use,
 341                             xdp->data_end - xdp->data);
 342        if (rc)
 343                goto error_unmap_dma;
 344        /* trigger the dma engine. ena_com_write_sq_doorbell()
 345         * has a mb
 346         */
 347        ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
 348        u64_stats_update_begin(&xdp_ring->syncp);
 349        xdp_ring->tx_stats.doorbells++;
 350        u64_stats_update_end(&xdp_ring->syncp);
 351
 352        return NETDEV_TX_OK;
 353
 354error_unmap_dma:
 355        ena_unmap_tx_buff(xdp_ring, tx_info);
 356        tx_info->xdpf = NULL;
 357error_drop_packet:
 358        __free_page(tx_info->xdp_rx_page);
 359        return NETDEV_TX_OK;
 360}
 361
 362static int ena_xdp_execute(struct ena_ring *rx_ring,
 363                           struct xdp_buff *xdp,
 364                           struct ena_rx_buffer *rx_info)
 365{
 366        struct bpf_prog *xdp_prog;
 367        u32 verdict = XDP_PASS;
 368
 369        rcu_read_lock();
 370        xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
 371
 372        if (!xdp_prog)
 373                goto out;
 374
 375        verdict = bpf_prog_run_xdp(xdp_prog, xdp);
 376
 377        if (verdict == XDP_TX)
 378                ena_xdp_xmit_buff(rx_ring->netdev,
 379                                  xdp,
 380                                  rx_ring->qid + rx_ring->adapter->num_io_queues,
 381                                  rx_info);
 382        else if (unlikely(verdict == XDP_ABORTED))
 383                trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
 384        else if (unlikely(verdict > XDP_TX))
 385                bpf_warn_invalid_xdp_action(verdict);
 386out:
 387        rcu_read_unlock();
 388        return verdict;
 389}
 390
 391static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
 392{
 393        adapter->xdp_first_ring = adapter->num_io_queues;
 394        adapter->xdp_num_queues = adapter->num_io_queues;
 395
 396        ena_init_io_rings(adapter,
 397                          adapter->xdp_first_ring,
 398                          adapter->xdp_num_queues);
 399}
 400
 401static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
 402{
 403        int rc = 0;
 404
 405        rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring,
 406                                             adapter->xdp_num_queues);
 407        if (rc)
 408                goto setup_err;
 409
 410        rc = ena_create_io_tx_queues_in_range(adapter,
 411                                              adapter->xdp_first_ring,
 412                                              adapter->xdp_num_queues);
 413        if (rc)
 414                goto create_err;
 415
 416        return 0;
 417
 418create_err:
 419        ena_free_all_io_tx_resources(adapter);
 420setup_err:
 421        return rc;
 422}
 423
 424/* Provides a way for both kernel and bpf-prog to know
 425 * more about the RX-queue a given XDP frame arrived on.
 426 */
 427static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
 428{
 429        int rc;
 430
 431        rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid);
 432
 433        if (rc) {
 434                netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
 435                          "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
 436                          rx_ring->qid, rc);
 437                goto err;
 438        }
 439
 440        rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
 441                                        NULL);
 442
 443        if (rc) {
 444                netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
 445                          "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
 446                          rx_ring->qid, rc);
 447                xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 448        }
 449
 450err:
 451        return rc;
 452}
 453
 454static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
 455{
 456        xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
 457        xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 458}
 459
 460static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
 461                                                 struct bpf_prog *prog,
 462                                                 int first, int count)
 463{
 464        struct ena_ring *rx_ring;
 465        int i = 0;
 466
 467        for (i = first; i < count; i++) {
 468                rx_ring = &adapter->rx_ring[i];
 469                xchg(&rx_ring->xdp_bpf_prog, prog);
 470                if (prog) {
 471                        ena_xdp_register_rxq_info(rx_ring);
 472                        rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
 473                } else {
 474                        ena_xdp_unregister_rxq_info(rx_ring);
 475                        rx_ring->rx_headroom = 0;
 476                }
 477        }
 478}
 479
 480static void ena_xdp_exchange_program(struct ena_adapter *adapter,
 481                                     struct bpf_prog *prog)
 482{
 483        struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
 484
 485        ena_xdp_exchange_program_rx_in_range(adapter,
 486                                             prog,
 487                                             0,
 488                                             adapter->num_io_queues);
 489
 490        if (old_bpf_prog)
 491                bpf_prog_put(old_bpf_prog);
 492}
 493
 494static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
 495{
 496        bool was_up;
 497        int rc;
 498
 499        was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 500
 501        if (was_up)
 502                ena_down(adapter);
 503
 504        adapter->xdp_first_ring = 0;
 505        adapter->xdp_num_queues = 0;
 506        ena_xdp_exchange_program(adapter, NULL);
 507        if (was_up) {
 508                rc = ena_up(adapter);
 509                if (rc)
 510                        return rc;
 511        }
 512        return 0;
 513}
 514
 515static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
 516{
 517        struct ena_adapter *adapter = netdev_priv(netdev);
 518        struct bpf_prog *prog = bpf->prog;
 519        struct bpf_prog *old_bpf_prog;
 520        int rc, prev_mtu;
 521        bool is_up;
 522
 523        is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 524        rc = ena_xdp_allowed(adapter);
 525        if (rc == ENA_XDP_ALLOWED) {
 526                old_bpf_prog = adapter->xdp_bpf_prog;
 527                if (prog) {
 528                        if (!is_up) {
 529                                ena_init_all_xdp_queues(adapter);
 530                        } else if (!old_bpf_prog) {
 531                                ena_down(adapter);
 532                                ena_init_all_xdp_queues(adapter);
 533                        }
 534                        ena_xdp_exchange_program(adapter, prog);
 535
 536                        if (is_up && !old_bpf_prog) {
 537                                rc = ena_up(adapter);
 538                                if (rc)
 539                                        return rc;
 540                        }
 541                } else if (old_bpf_prog) {
 542                        rc = ena_destroy_and_free_all_xdp_queues(adapter);
 543                        if (rc)
 544                                return rc;
 545                }
 546
 547                prev_mtu = netdev->max_mtu;
 548                netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
 549
 550                if (!old_bpf_prog)
 551                        netif_info(adapter, drv, adapter->netdev,
 552                                   "xdp program set, changing the max_mtu from %d to %d",
 553                                   prev_mtu, netdev->max_mtu);
 554
 555        } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
 556                netif_err(adapter, drv, adapter->netdev,
 557                          "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
 558                          netdev->mtu, ENA_XDP_MAX_MTU);
 559                NL_SET_ERR_MSG_MOD(bpf->extack,
 560                                   "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
 561                return -EINVAL;
 562        } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
 563                netif_err(adapter, drv, adapter->netdev,
 564                          "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
 565                          adapter->num_io_queues, adapter->max_num_io_queues);
 566                NL_SET_ERR_MSG_MOD(bpf->extack,
 567                                   "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
 568                return -EINVAL;
 569        }
 570
 571        return 0;
 572}
 573
 574/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
 575 * program as well as to query the current xdp program id.
 576 */
 577static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
 578{
 579        struct ena_adapter *adapter = netdev_priv(netdev);
 580
 581        switch (bpf->command) {
 582        case XDP_SETUP_PROG:
 583                return ena_xdp_set(netdev, bpf);
 584        case XDP_QUERY_PROG:
 585                bpf->prog_id = adapter->xdp_bpf_prog ?
 586                        adapter->xdp_bpf_prog->aux->id : 0;
 587                break;
 588        default:
 589                return -EINVAL;
 590        }
 591        return 0;
 592}
 593
 594static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
 595{
 596#ifdef CONFIG_RFS_ACCEL
 597        u32 i;
 598        int rc;
 599
 600        adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
 601        if (!adapter->netdev->rx_cpu_rmap)
 602                return -ENOMEM;
 603        for (i = 0; i < adapter->num_io_queues; i++) {
 604                int irq_idx = ENA_IO_IRQ_IDX(i);
 605
 606                rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
 607                                      pci_irq_vector(adapter->pdev, irq_idx));
 608                if (rc) {
 609                        free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
 610                        adapter->netdev->rx_cpu_rmap = NULL;
 611                        return rc;
 612                }
 613        }
 614#endif /* CONFIG_RFS_ACCEL */
 615        return 0;
 616}
 617
 618static void ena_init_io_rings_common(struct ena_adapter *adapter,
 619                                     struct ena_ring *ring, u16 qid)
 620{
 621        ring->qid = qid;
 622        ring->pdev = adapter->pdev;
 623        ring->dev = &adapter->pdev->dev;
 624        ring->netdev = adapter->netdev;
 625        ring->napi = &adapter->ena_napi[qid].napi;
 626        ring->adapter = adapter;
 627        ring->ena_dev = adapter->ena_dev;
 628        ring->per_napi_packets = 0;
 629        ring->cpu = 0;
 630        ring->first_interrupt = false;
 631        ring->no_interrupt_event_cnt = 0;
 632        u64_stats_init(&ring->syncp);
 633}
 634
 635static void ena_init_io_rings(struct ena_adapter *adapter,
 636                              int first_index, int count)
 637{
 638        struct ena_com_dev *ena_dev;
 639        struct ena_ring *txr, *rxr;
 640        int i;
 641
 642        ena_dev = adapter->ena_dev;
 643
 644        for (i = first_index; i < first_index + count; i++) {
 645                txr = &adapter->tx_ring[i];
 646                rxr = &adapter->rx_ring[i];
 647
 648                /* TX common ring state */
 649                ena_init_io_rings_common(adapter, txr, i);
 650
 651                /* TX specific ring state */
 652                txr->ring_size = adapter->requested_tx_ring_size;
 653                txr->tx_max_header_size = ena_dev->tx_max_header_size;
 654                txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
 655                txr->sgl_size = adapter->max_tx_sgl_size;
 656                txr->smoothed_interval =
 657                        ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
 658
 659                /* Don't init RX queues for xdp queues */
 660                if (!ENA_IS_XDP_INDEX(adapter, i)) {
 661                        /* RX common ring state */
 662                        ena_init_io_rings_common(adapter, rxr, i);
 663
 664                        /* RX specific ring state */
 665                        rxr->ring_size = adapter->requested_rx_ring_size;
 666                        rxr->rx_copybreak = adapter->rx_copybreak;
 667                        rxr->sgl_size = adapter->max_rx_sgl_size;
 668                        rxr->smoothed_interval =
 669                                ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
 670                        rxr->empty_rx_queue = 0;
 671                        adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 672                }
 673        }
 674}
 675
 676/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
 677 * @adapter: network interface device structure
 678 * @qid: queue index
 679 *
 680 * Return 0 on success, negative on failure
 681 */
 682static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
 683{
 684        struct ena_ring *tx_ring = &adapter->tx_ring[qid];
 685        struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
 686        int size, i, node;
 687
 688        if (tx_ring->tx_buffer_info) {
 689                netif_err(adapter, ifup,
 690                          adapter->netdev, "tx_buffer_info info is not NULL");
 691                return -EEXIST;
 692        }
 693
 694        size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
 695        node = cpu_to_node(ena_irq->cpu);
 696
 697        tx_ring->tx_buffer_info = vzalloc_node(size, node);
 698        if (!tx_ring->tx_buffer_info) {
 699                tx_ring->tx_buffer_info = vzalloc(size);
 700                if (!tx_ring->tx_buffer_info)
 701                        goto err_tx_buffer_info;
 702        }
 703
 704        size = sizeof(u16) * tx_ring->ring_size;
 705        tx_ring->free_ids = vzalloc_node(size, node);
 706        if (!tx_ring->free_ids) {
 707                tx_ring->free_ids = vzalloc(size);
 708                if (!tx_ring->free_ids)
 709                        goto err_tx_free_ids;
 710        }
 711
 712        size = tx_ring->tx_max_header_size;
 713        tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
 714        if (!tx_ring->push_buf_intermediate_buf) {
 715                tx_ring->push_buf_intermediate_buf = vzalloc(size);
 716                if (!tx_ring->push_buf_intermediate_buf)
 717                        goto err_push_buf_intermediate_buf;
 718        }
 719
 720        /* Req id ring for TX out of order completions */
 721        for (i = 0; i < tx_ring->ring_size; i++)
 722                tx_ring->free_ids[i] = i;
 723
 724        /* Reset tx statistics */
 725        memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
 726
 727        tx_ring->next_to_use = 0;
 728        tx_ring->next_to_clean = 0;
 729        tx_ring->cpu = ena_irq->cpu;
 730        return 0;
 731
 732err_push_buf_intermediate_buf:
 733        vfree(tx_ring->free_ids);
 734        tx_ring->free_ids = NULL;
 735err_tx_free_ids:
 736        vfree(tx_ring->tx_buffer_info);
 737        tx_ring->tx_buffer_info = NULL;
 738err_tx_buffer_info:
 739        return -ENOMEM;
 740}
 741
 742/* ena_free_tx_resources - Free I/O Tx Resources per Queue
 743 * @adapter: network interface device structure
 744 * @qid: queue index
 745 *
 746 * Free all transmit software resources
 747 */
 748static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
 749{
 750        struct ena_ring *tx_ring = &adapter->tx_ring[qid];
 751
 752        vfree(tx_ring->tx_buffer_info);
 753        tx_ring->tx_buffer_info = NULL;
 754
 755        vfree(tx_ring->free_ids);
 756        tx_ring->free_ids = NULL;
 757
 758        vfree(tx_ring->push_buf_intermediate_buf);
 759        tx_ring->push_buf_intermediate_buf = NULL;
 760}
 761
 762static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
 763                                           int first_index,
 764                                           int count)
 765{
 766        int i, rc = 0;
 767
 768        for (i = first_index; i < first_index + count; i++) {
 769                rc = ena_setup_tx_resources(adapter, i);
 770                if (rc)
 771                        goto err_setup_tx;
 772        }
 773
 774        return 0;
 775
 776err_setup_tx:
 777
 778        netif_err(adapter, ifup, adapter->netdev,
 779                  "Tx queue %d: allocation failed\n", i);
 780
 781        /* rewind the index freeing the rings as we go */
 782        while (first_index < i--)
 783                ena_free_tx_resources(adapter, i);
 784        return rc;
 785}
 786
 787static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
 788                                                  int first_index, int count)
 789{
 790        int i;
 791
 792        for (i = first_index; i < first_index + count; i++)
 793                ena_free_tx_resources(adapter, i);
 794}
 795
 796/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
 797 * @adapter: board private structure
 798 *
 799 * Free all transmit software resources
 800 */
 801static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
 802{
 803        ena_free_all_io_tx_resources_in_range(adapter,
 804                                              0,
 805                                              adapter->xdp_num_queues +
 806                                              adapter->num_io_queues);
 807}
 808
 809static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
 810{
 811        if (likely(req_id < rx_ring->ring_size))
 812                return 0;
 813
 814        netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
 815                  "Invalid rx req_id: %hu\n", req_id);
 816
 817        u64_stats_update_begin(&rx_ring->syncp);
 818        rx_ring->rx_stats.bad_req_id++;
 819        u64_stats_update_end(&rx_ring->syncp);
 820
 821        /* Trigger device reset */
 822        rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
 823        set_bit(ENA_FLAG_TRIGGER_RESET, &rx_ring->adapter->flags);
 824        return -EFAULT;
 825}
 826
 827/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
 828 * @adapter: network interface device structure
 829 * @qid: queue index
 830 *
 831 * Returns 0 on success, negative on failure
 832 */
 833static int ena_setup_rx_resources(struct ena_adapter *adapter,
 834                                  u32 qid)
 835{
 836        struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 837        struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
 838        int size, node, i;
 839
 840        if (rx_ring->rx_buffer_info) {
 841                netif_err(adapter, ifup, adapter->netdev,
 842                          "rx_buffer_info is not NULL");
 843                return -EEXIST;
 844        }
 845
 846        /* alloc extra element so in rx path
 847         * we can always prefetch rx_info + 1
 848         */
 849        size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
 850        node = cpu_to_node(ena_irq->cpu);
 851
 852        rx_ring->rx_buffer_info = vzalloc_node(size, node);
 853        if (!rx_ring->rx_buffer_info) {
 854                rx_ring->rx_buffer_info = vzalloc(size);
 855                if (!rx_ring->rx_buffer_info)
 856                        return -ENOMEM;
 857        }
 858
 859        size = sizeof(u16) * rx_ring->ring_size;
 860        rx_ring->free_ids = vzalloc_node(size, node);
 861        if (!rx_ring->free_ids) {
 862                rx_ring->free_ids = vzalloc(size);
 863                if (!rx_ring->free_ids) {
 864                        vfree(rx_ring->rx_buffer_info);
 865                        rx_ring->rx_buffer_info = NULL;
 866                        return -ENOMEM;
 867                }
 868        }
 869
 870        /* Req id ring for receiving RX pkts out of order */
 871        for (i = 0; i < rx_ring->ring_size; i++)
 872                rx_ring->free_ids[i] = i;
 873
 874        /* Reset rx statistics */
 875        memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
 876
 877        rx_ring->next_to_clean = 0;
 878        rx_ring->next_to_use = 0;
 879        rx_ring->cpu = ena_irq->cpu;
 880
 881        return 0;
 882}
 883
 884/* ena_free_rx_resources - Free I/O Rx Resources
 885 * @adapter: network interface device structure
 886 * @qid: queue index
 887 *
 888 * Free all receive software resources
 889 */
 890static void ena_free_rx_resources(struct ena_adapter *adapter,
 891                                  u32 qid)
 892{
 893        struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 894
 895        vfree(rx_ring->rx_buffer_info);
 896        rx_ring->rx_buffer_info = NULL;
 897
 898        vfree(rx_ring->free_ids);
 899        rx_ring->free_ids = NULL;
 900}
 901
 902/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
 903 * @adapter: board private structure
 904 *
 905 * Return 0 on success, negative on failure
 906 */
 907static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
 908{
 909        int i, rc = 0;
 910
 911        for (i = 0; i < adapter->num_io_queues; i++) {
 912                rc = ena_setup_rx_resources(adapter, i);
 913                if (rc)
 914                        goto err_setup_rx;
 915        }
 916
 917        return 0;
 918
 919err_setup_rx:
 920
 921        netif_err(adapter, ifup, adapter->netdev,
 922                  "Rx queue %d: allocation failed\n", i);
 923
 924        /* rewind the index freeing the rings as we go */
 925        while (i--)
 926                ena_free_rx_resources(adapter, i);
 927        return rc;
 928}
 929
 930/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
 931 * @adapter: board private structure
 932 *
 933 * Free all receive software resources
 934 */
 935static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 936{
 937        int i;
 938
 939        for (i = 0; i < adapter->num_io_queues; i++)
 940                ena_free_rx_resources(adapter, i);
 941}
 942
 943static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 944                                    struct ena_rx_buffer *rx_info, gfp_t gfp)
 945{
 946        struct ena_com_buf *ena_buf;
 947        struct page *page;
 948        dma_addr_t dma;
 949
 950        /* if previous allocated page is not used */
 951        if (unlikely(rx_info->page))
 952                return 0;
 953
 954        page = alloc_page(gfp);
 955        if (unlikely(!page)) {
 956                u64_stats_update_begin(&rx_ring->syncp);
 957                rx_ring->rx_stats.page_alloc_fail++;
 958                u64_stats_update_end(&rx_ring->syncp);
 959                return -ENOMEM;
 960        }
 961
 962        dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
 963                           DMA_FROM_DEVICE);
 964        if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
 965                u64_stats_update_begin(&rx_ring->syncp);
 966                rx_ring->rx_stats.dma_mapping_err++;
 967                u64_stats_update_end(&rx_ring->syncp);
 968
 969                __free_page(page);
 970                return -EIO;
 971        }
 972        netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 973                  "alloc page %p, rx_info %p\n", page, rx_info);
 974
 975        rx_info->page = page;
 976        rx_info->page_offset = 0;
 977        ena_buf = &rx_info->ena_buf;
 978        ena_buf->paddr = dma + rx_ring->rx_headroom;
 979        ena_buf->len = ENA_PAGE_SIZE - rx_ring->rx_headroom;
 980
 981        return 0;
 982}
 983
 984static void ena_free_rx_page(struct ena_ring *rx_ring,
 985                             struct ena_rx_buffer *rx_info)
 986{
 987        struct page *page = rx_info->page;
 988        struct ena_com_buf *ena_buf = &rx_info->ena_buf;
 989
 990        if (unlikely(!page)) {
 991                netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
 992                           "Trying to free unallocated buffer\n");
 993                return;
 994        }
 995
 996        dma_unmap_page(rx_ring->dev,
 997                       ena_buf->paddr - rx_ring->rx_headroom,
 998                       ENA_PAGE_SIZE,
 999                       DMA_FROM_DEVICE);
1000
1001        __free_page(page);
1002        rx_info->page = NULL;
1003}
1004
1005static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
1006{
1007        u16 next_to_use, req_id;
1008        u32 i;
1009        int rc;
1010
1011        next_to_use = rx_ring->next_to_use;
1012
1013        for (i = 0; i < num; i++) {
1014                struct ena_rx_buffer *rx_info;
1015
1016                req_id = rx_ring->free_ids[next_to_use];
1017
1018                rx_info = &rx_ring->rx_buffer_info[req_id];
1019
1020                rc = ena_alloc_rx_page(rx_ring, rx_info,
1021                                       GFP_ATOMIC | __GFP_COMP);
1022                if (unlikely(rc < 0)) {
1023                        netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
1024                                   "failed to alloc buffer for rx queue %d\n",
1025                                   rx_ring->qid);
1026                        break;
1027                }
1028                rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1029                                                &rx_info->ena_buf,
1030                                                req_id);
1031                if (unlikely(rc)) {
1032                        netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
1033                                   "failed to add buffer for rx queue %d\n",
1034                                   rx_ring->qid);
1035                        break;
1036                }
1037                next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1038                                                   rx_ring->ring_size);
1039        }
1040
1041        if (unlikely(i < num)) {
1042                u64_stats_update_begin(&rx_ring->syncp);
1043                rx_ring->rx_stats.refil_partial++;
1044                u64_stats_update_end(&rx_ring->syncp);
1045                netdev_warn(rx_ring->netdev,
1046                            "refilled rx qid %d with only %d buffers (from %d)\n",
1047                            rx_ring->qid, i, num);
1048        }
1049
1050        /* ena_com_write_sq_doorbell issues a wmb() */
1051        if (likely(i))
1052                ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1053
1054        rx_ring->next_to_use = next_to_use;
1055
1056        return i;
1057}
1058
1059static void ena_free_rx_bufs(struct ena_adapter *adapter,
1060                             u32 qid)
1061{
1062        struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1063        u32 i;
1064
1065        for (i = 0; i < rx_ring->ring_size; i++) {
1066                struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1067
1068                if (rx_info->page)
1069                        ena_free_rx_page(rx_ring, rx_info);
1070        }
1071}
1072
1073/* ena_refill_all_rx_bufs - allocate all queues Rx buffers
1074 * @adapter: board private structure
1075 */
1076static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1077{
1078        struct ena_ring *rx_ring;
1079        int i, rc, bufs_num;
1080
1081        for (i = 0; i < adapter->num_io_queues; i++) {
1082                rx_ring = &adapter->rx_ring[i];
1083                bufs_num = rx_ring->ring_size - 1;
1084                rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1085
1086                if (unlikely(rc != bufs_num))
1087                        netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
1088                                   "refilling Queue %d failed. allocated %d buffers from: %d\n",
1089                                   i, rc, bufs_num);
1090        }
1091}
1092
1093static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
1094{
1095        int i;
1096
1097        for (i = 0; i < adapter->num_io_queues; i++)
1098                ena_free_rx_bufs(adapter, i);
1099}
1100
1101static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
1102                              struct ena_tx_buffer *tx_info)
1103{
1104        struct ena_com_buf *ena_buf;
1105        u32 cnt;
1106        int i;
1107
1108        ena_buf = tx_info->bufs;
1109        cnt = tx_info->num_of_bufs;
1110
1111        if (unlikely(!cnt))
1112                return;
1113
1114        if (tx_info->map_linear_data) {
1115                dma_unmap_single(tx_ring->dev,
1116                                 dma_unmap_addr(ena_buf, paddr),
1117                                 dma_unmap_len(ena_buf, len),
1118                                 DMA_TO_DEVICE);
1119                ena_buf++;
1120                cnt--;
1121        }
1122
1123        /* unmap remaining mapped pages */
1124        for (i = 0; i < cnt; i++) {
1125                dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
1126                               dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
1127                ena_buf++;
1128        }
1129}
1130
1131/* ena_free_tx_bufs - Free Tx Buffers per Queue
1132 * @tx_ring: TX ring for which buffers be freed
1133 */
1134static void ena_free_tx_bufs(struct ena_ring *tx_ring)
1135{
1136        bool print_once = true;
1137        u32 i;
1138
1139        for (i = 0; i < tx_ring->ring_size; i++) {
1140                struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1141
1142                if (!tx_info->skb)
1143                        continue;
1144
1145                if (print_once) {
1146                        netdev_notice(tx_ring->netdev,
1147                                      "free uncompleted tx skb qid %d idx 0x%x\n",
1148                                      tx_ring->qid, i);
1149                        print_once = false;
1150                } else {
1151                        netdev_dbg(tx_ring->netdev,
1152                                   "free uncompleted tx skb qid %d idx 0x%x\n",
1153                                   tx_ring->qid, i);
1154                }
1155
1156                ena_unmap_tx_buff(tx_ring, tx_info);
1157
1158                dev_kfree_skb_any(tx_info->skb);
1159        }
1160        netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
1161                                                  tx_ring->qid));
1162}
1163
1164static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
1165{
1166        struct ena_ring *tx_ring;
1167        int i;
1168
1169        for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
1170                tx_ring = &adapter->tx_ring[i];
1171                ena_free_tx_bufs(tx_ring);
1172        }
1173}
1174
1175static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1176{
1177        u16 ena_qid;
1178        int i;
1179
1180        for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
1181                ena_qid = ENA_IO_TXQ_IDX(i);
1182                ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1183        }
1184}
1185
1186static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1187{
1188        u16 ena_qid;
1189        int i;
1190
1191        for (i = 0; i < adapter->num_io_queues; i++) {
1192                ena_qid = ENA_IO_RXQ_IDX(i);
1193                cancel_work_sync(&adapter->ena_napi[i].dim.work);
1194                ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1195        }
1196}
1197
1198static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
1199{
1200        ena_destroy_all_tx_queues(adapter);
1201        ena_destroy_all_rx_queues(adapter);
1202}
1203
1204static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
1205                                 struct ena_tx_buffer *tx_info, bool is_xdp)
1206{
1207        if (tx_info)
1208                netif_err(ring->adapter,
1209                          tx_done,
1210                          ring->netdev,
1211                          "tx_info doesn't have valid %s",
1212                           is_xdp ? "xdp frame" : "skb");
1213        else
1214                netif_err(ring->adapter,
1215                          tx_done,
1216                          ring->netdev,
1217                          "Invalid req_id: %hu\n",
1218                          req_id);
1219
1220        u64_stats_update_begin(&ring->syncp);
1221        ring->tx_stats.bad_req_id++;
1222        u64_stats_update_end(&ring->syncp);
1223
1224        /* Trigger device reset */
1225        ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
1226        set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags);
1227        return -EFAULT;
1228}
1229
1230static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
1231{
1232        struct ena_tx_buffer *tx_info = NULL;
1233
1234        if (likely(req_id < tx_ring->ring_size)) {
1235                tx_info = &tx_ring->tx_buffer_info[req_id];
1236                if (likely(tx_info->skb))
1237                        return 0;
1238        }
1239
1240        return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
1241}
1242
1243static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
1244{
1245        struct ena_tx_buffer *tx_info = NULL;
1246
1247        if (likely(req_id < xdp_ring->ring_size)) {
1248                tx_info = &xdp_ring->tx_buffer_info[req_id];
1249                if (likely(tx_info->xdpf))
1250                        return 0;
1251        }
1252
1253        return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
1254}
1255
1256static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
1257{
1258        struct netdev_queue *txq;
1259        bool above_thresh;
1260        u32 tx_bytes = 0;
1261        u32 total_done = 0;
1262        u16 next_to_clean;
1263        u16 req_id;
1264        int tx_pkts = 0;
1265        int rc;
1266
1267        next_to_clean = tx_ring->next_to_clean;
1268        txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
1269
1270        while (tx_pkts < budget) {
1271                struct ena_tx_buffer *tx_info;
1272                struct sk_buff *skb;
1273
1274                rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
1275                                                &req_id);
1276                if (rc)
1277                        break;
1278
1279                rc = validate_tx_req_id(tx_ring, req_id);
1280                if (rc)
1281                        break;
1282
1283                tx_info = &tx_ring->tx_buffer_info[req_id];
1284                skb = tx_info->skb;
1285
1286                /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
1287                prefetch(&skb->end);
1288
1289                tx_info->skb = NULL;
1290                tx_info->last_jiffies = 0;
1291
1292                ena_unmap_tx_buff(tx_ring, tx_info);
1293
1294                netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
1295                          "tx_poll: q %d skb %p completed\n", tx_ring->qid,
1296                          skb);
1297
1298                tx_bytes += skb->len;
1299                dev_kfree_skb(skb);
1300                tx_pkts++;
1301                total_done += tx_info->tx_descs;
1302
1303                tx_ring->free_ids[next_to_clean] = req_id;
1304                next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1305                                                     tx_ring->ring_size);
1306        }
1307
1308        tx_ring->next_to_clean = next_to_clean;
1309        ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
1310        ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
1311
1312        netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
1313
1314        netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
1315                  "tx_poll: q %d done. total pkts: %d\n",
1316                  tx_ring->qid, tx_pkts);
1317
1318        /* need to make the rings circular update visible to
1319         * ena_start_xmit() before checking for netif_queue_stopped().
1320         */
1321        smp_mb();
1322
1323        above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1324                                                    ENA_TX_WAKEUP_THRESH);
1325        if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
1326                __netif_tx_lock(txq, smp_processor_id());
1327                above_thresh =
1328                        ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1329                                                     ENA_TX_WAKEUP_THRESH);
1330                if (netif_tx_queue_stopped(txq) && above_thresh &&
1331                    test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
1332                        netif_tx_wake_queue(txq);
1333                        u64_stats_update_begin(&tx_ring->syncp);
1334                        tx_ring->tx_stats.queue_wakeup++;
1335                        u64_stats_update_end(&tx_ring->syncp);
1336                }
1337                __netif_tx_unlock(txq);
1338        }
1339
1340        return tx_pkts;
1341}
1342
1343static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags)
1344{
1345        struct sk_buff *skb;
1346
1347        if (frags)
1348                skb = napi_get_frags(rx_ring->napi);
1349        else
1350                skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1351                                                rx_ring->rx_copybreak);
1352
1353        if (unlikely(!skb)) {
1354                u64_stats_update_begin(&rx_ring->syncp);
1355                rx_ring->rx_stats.skb_alloc_fail++;
1356                u64_stats_update_end(&rx_ring->syncp);
1357                netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1358                          "Failed to allocate skb. frags: %d\n", frags);
1359                return NULL;
1360        }
1361
1362        return skb;
1363}
1364
1365static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
1366                                  struct ena_com_rx_buf_info *ena_bufs,
1367                                  u32 descs,
1368                                  u16 *next_to_clean)
1369{
1370        struct sk_buff *skb;
1371        struct ena_rx_buffer *rx_info;
1372        u16 len, req_id, buf = 0;
1373        void *va;
1374        int rc;
1375
1376        len = ena_bufs[buf].len;
1377        req_id = ena_bufs[buf].req_id;
1378
1379        rc = validate_rx_req_id(rx_ring, req_id);
1380        if (unlikely(rc < 0))
1381                return NULL;
1382
1383        rx_info = &rx_ring->rx_buffer_info[req_id];
1384
1385        if (unlikely(!rx_info->page)) {
1386                netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
1387                          "Page is NULL\n");
1388                return NULL;
1389        }
1390
1391        netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1392                  "rx_info %p page %p\n",
1393                  rx_info, rx_info->page);
1394
1395        /* save virt address of first buffer */
1396        va = page_address(rx_info->page) + rx_info->page_offset;
1397        prefetch(va + NET_IP_ALIGN);
1398
1399        if (len <= rx_ring->rx_copybreak) {
1400                skb = ena_alloc_skb(rx_ring, false);
1401                if (unlikely(!skb))
1402                        return NULL;
1403
1404                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1405                          "rx allocated small packet. len %d. data_len %d\n",
1406                          skb->len, skb->data_len);
1407
1408                /* sync this buffer for CPU use */
1409                dma_sync_single_for_cpu(rx_ring->dev,
1410                                        dma_unmap_addr(&rx_info->ena_buf, paddr),
1411                                        len,
1412                                        DMA_FROM_DEVICE);
1413                skb_copy_to_linear_data(skb, va, len);
1414                dma_sync_single_for_device(rx_ring->dev,
1415                                           dma_unmap_addr(&rx_info->ena_buf, paddr),
1416                                           len,
1417                                           DMA_FROM_DEVICE);
1418
1419                skb_put(skb, len);
1420                skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1421                rx_ring->free_ids[*next_to_clean] = req_id;
1422                *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
1423                                                     rx_ring->ring_size);
1424                return skb;
1425        }
1426
1427        skb = ena_alloc_skb(rx_ring, true);
1428        if (unlikely(!skb))
1429                return NULL;
1430
1431        do {
1432                dma_unmap_page(rx_ring->dev,
1433                               dma_unmap_addr(&rx_info->ena_buf, paddr),
1434                               ENA_PAGE_SIZE, DMA_FROM_DEVICE);
1435
1436                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
1437                                rx_info->page_offset, len, ENA_PAGE_SIZE);
1438                /* The offset is non zero only for the first buffer */
1439                rx_info->page_offset = 0;
1440
1441                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1442                          "rx skb updated. len %d. data_len %d\n",
1443                          skb->len, skb->data_len);
1444
1445                rx_info->page = NULL;
1446
1447                rx_ring->free_ids[*next_to_clean] = req_id;
1448                *next_to_clean =
1449                        ENA_RX_RING_IDX_NEXT(*next_to_clean,
1450                                             rx_ring->ring_size);
1451                if (likely(--descs == 0))
1452                        break;
1453
1454                buf++;
1455                len = ena_bufs[buf].len;
1456                req_id = ena_bufs[buf].req_id;
1457
1458                rc = validate_rx_req_id(rx_ring, req_id);
1459                if (unlikely(rc < 0))
1460                        return NULL;
1461
1462                rx_info = &rx_ring->rx_buffer_info[req_id];
1463        } while (1);
1464
1465        return skb;
1466}
1467
1468/* ena_rx_checksum - indicate in skb if hw indicated a good cksum
1469 * @adapter: structure containing adapter specific data
1470 * @ena_rx_ctx: received packet context/metadata
1471 * @skb: skb currently being received and modified
1472 */
1473static void ena_rx_checksum(struct ena_ring *rx_ring,
1474                                   struct ena_com_rx_ctx *ena_rx_ctx,
1475                                   struct sk_buff *skb)
1476{
1477        /* Rx csum disabled */
1478        if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
1479                skb->ip_summed = CHECKSUM_NONE;
1480                return;
1481        }
1482
1483        /* For fragmented packets the checksum isn't valid */
1484        if (ena_rx_ctx->frag) {
1485                skb->ip_summed = CHECKSUM_NONE;
1486                return;
1487        }
1488
1489        /* if IP and error */
1490        if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
1491                     (ena_rx_ctx->l3_csum_err))) {
1492                /* ipv4 checksum error */
1493                skb->ip_summed = CHECKSUM_NONE;
1494                u64_stats_update_begin(&rx_ring->syncp);
1495                rx_ring->rx_stats.bad_csum++;
1496                u64_stats_update_end(&rx_ring->syncp);
1497                netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1498                          "RX IPv4 header checksum error\n");
1499                return;
1500        }
1501
1502        /* if TCP/UDP */
1503        if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1504                   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
1505                if (unlikely(ena_rx_ctx->l4_csum_err)) {
1506                        /* TCP/UDP checksum error */
1507                        u64_stats_update_begin(&rx_ring->syncp);
1508                        rx_ring->rx_stats.bad_csum++;
1509                        u64_stats_update_end(&rx_ring->syncp);
1510                        netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1511                                  "RX L4 checksum error\n");
1512                        skb->ip_summed = CHECKSUM_NONE;
1513                        return;
1514                }
1515
1516                if (likely(ena_rx_ctx->l4_csum_checked)) {
1517                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1518                        u64_stats_update_begin(&rx_ring->syncp);
1519                        rx_ring->rx_stats.csum_good++;
1520                        u64_stats_update_end(&rx_ring->syncp);
1521                } else {
1522                        u64_stats_update_begin(&rx_ring->syncp);
1523                        rx_ring->rx_stats.csum_unchecked++;
1524                        u64_stats_update_end(&rx_ring->syncp);
1525                        skb->ip_summed = CHECKSUM_NONE;
1526                }
1527        } else {
1528                skb->ip_summed = CHECKSUM_NONE;
1529                return;
1530        }
1531
1532}
1533
1534static void ena_set_rx_hash(struct ena_ring *rx_ring,
1535                            struct ena_com_rx_ctx *ena_rx_ctx,
1536                            struct sk_buff *skb)
1537{
1538        enum pkt_hash_types hash_type;
1539
1540        if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
1541                if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1542                           (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
1543
1544                        hash_type = PKT_HASH_TYPE_L4;
1545                else
1546                        hash_type = PKT_HASH_TYPE_NONE;
1547
1548                /* Override hash type if the packet is fragmented */
1549                if (ena_rx_ctx->frag)
1550                        hash_type = PKT_HASH_TYPE_NONE;
1551
1552                skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
1553        }
1554}
1555
1556static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
1557{
1558        struct ena_rx_buffer *rx_info;
1559        int ret;
1560
1561        rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1562        xdp->data = page_address(rx_info->page) +
1563                rx_info->page_offset + rx_ring->rx_headroom;
1564        xdp_set_data_meta_invalid(xdp);
1565        xdp->data_hard_start = page_address(rx_info->page);
1566        xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len;
1567        /* If for some reason we received a bigger packet than
1568         * we expect, then we simply drop it
1569         */
1570        if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
1571                return XDP_DROP;
1572
1573        ret = ena_xdp_execute(rx_ring, xdp, rx_info);
1574
1575        /* The xdp program might expand the headers */
1576        if (ret == XDP_PASS) {
1577                rx_info->page_offset = xdp->data - xdp->data_hard_start;
1578                rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
1579        }
1580
1581        return ret;
1582}
1583/* ena_clean_rx_irq - Cleanup RX irq
1584 * @rx_ring: RX ring to clean
1585 * @napi: napi handler
1586 * @budget: how many packets driver is allowed to clean
1587 *
1588 * Returns the number of cleaned buffers.
1589 */
1590static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
1591                            u32 budget)
1592{
1593        u16 next_to_clean = rx_ring->next_to_clean;
1594        struct ena_com_rx_ctx ena_rx_ctx;
1595        struct ena_rx_buffer *rx_info;
1596        struct ena_adapter *adapter;
1597        u32 res_budget, work_done;
1598        int rx_copybreak_pkt = 0;
1599        int refill_threshold;
1600        struct sk_buff *skb;
1601        int refill_required;
1602        struct xdp_buff xdp;
1603        int total_len = 0;
1604        int xdp_verdict;
1605        int rc = 0;
1606        int i;
1607
1608        netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1609                  "%s qid %d\n", __func__, rx_ring->qid);
1610        res_budget = budget;
1611        xdp.rxq = &rx_ring->xdp_rxq;
1612        xdp.frame_sz = ENA_PAGE_SIZE;
1613
1614        do {
1615                xdp_verdict = XDP_PASS;
1616                skb = NULL;
1617                ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1618                ena_rx_ctx.max_bufs = rx_ring->sgl_size;
1619                ena_rx_ctx.descs = 0;
1620                ena_rx_ctx.pkt_offset = 0;
1621                rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
1622                                    rx_ring->ena_com_io_sq,
1623                                    &ena_rx_ctx);
1624                if (unlikely(rc))
1625                        goto error;
1626
1627                if (unlikely(ena_rx_ctx.descs == 0))
1628                        break;
1629
1630                rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1631                rx_info->page_offset = ena_rx_ctx.pkt_offset;
1632
1633                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1634                          "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1635                          rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1636                          ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1637
1638                if (ena_xdp_present_ring(rx_ring))
1639                        xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
1640
1641                /* allocate skb and fill it */
1642                if (xdp_verdict == XDP_PASS)
1643                        skb = ena_rx_skb(rx_ring,
1644                                         rx_ring->ena_bufs,
1645                                         ena_rx_ctx.descs,
1646                                         &next_to_clean);
1647
1648                if (unlikely(!skb)) {
1649                        if (xdp_verdict == XDP_TX)
1650                                ena_free_rx_page(rx_ring,
1651                                                 &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]);
1652                        for (i = 0; i < ena_rx_ctx.descs; i++) {
1653                                rx_ring->free_ids[next_to_clean] =
1654                                        rx_ring->ena_bufs[i].req_id;
1655                                next_to_clean =
1656                                        ENA_RX_RING_IDX_NEXT(next_to_clean,
1657                                                             rx_ring->ring_size);
1658                        }
1659                        if (xdp_verdict != XDP_PASS) {
1660                                res_budget--;
1661                                continue;
1662                        }
1663                        break;
1664                }
1665
1666                ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
1667
1668                ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
1669
1670                skb_record_rx_queue(skb, rx_ring->qid);
1671
1672                if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) {
1673                        total_len += rx_ring->ena_bufs[0].len;
1674                        rx_copybreak_pkt++;
1675                        napi_gro_receive(napi, skb);
1676                } else {
1677                        total_len += skb->len;
1678                        napi_gro_frags(napi);
1679                }
1680
1681                res_budget--;
1682        } while (likely(res_budget));
1683
1684        work_done = budget - res_budget;
1685        rx_ring->per_napi_packets += work_done;
1686        u64_stats_update_begin(&rx_ring->syncp);
1687        rx_ring->rx_stats.bytes += total_len;
1688        rx_ring->rx_stats.cnt += work_done;
1689        rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
1690        u64_stats_update_end(&rx_ring->syncp);
1691
1692        rx_ring->next_to_clean = next_to_clean;
1693
1694        refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
1695        refill_threshold =
1696                min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
1697                      ENA_RX_REFILL_THRESH_PACKET);
1698
1699        /* Optimization, try to batch new rx buffers */
1700        if (refill_required > refill_threshold) {
1701                ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1702                ena_refill_rx_bufs(rx_ring, refill_required);
1703        }
1704
1705        return work_done;
1706
1707error:
1708        adapter = netdev_priv(rx_ring->netdev);
1709
1710        u64_stats_update_begin(&rx_ring->syncp);
1711        rx_ring->rx_stats.bad_desc_num++;
1712        u64_stats_update_end(&rx_ring->syncp);
1713
1714        /* Too many desc from the device. Trigger reset */
1715        adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
1716        set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
1717
1718        return 0;
1719}
1720
1721static void ena_dim_work(struct work_struct *w)
1722{
1723        struct dim *dim = container_of(w, struct dim, work);
1724        struct dim_cq_moder cur_moder =
1725                net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
1726        struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim);
1727
1728        ena_napi->rx_ring->smoothed_interval = cur_moder.usec;
1729        dim->state = DIM_START_MEASURE;
1730}
1731
1732static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
1733{
1734        struct dim_sample dim_sample;
1735        struct ena_ring *rx_ring = ena_napi->rx_ring;
1736
1737        if (!rx_ring->per_napi_packets)
1738                return;
1739
1740        rx_ring->non_empty_napi_events++;
1741
1742        dim_update_sample(rx_ring->non_empty_napi_events,
1743                          rx_ring->rx_stats.cnt,
1744                          rx_ring->rx_stats.bytes,
1745                          &dim_sample);
1746
1747        net_dim(&ena_napi->dim, dim_sample);
1748
1749        rx_ring->per_napi_packets = 0;
1750}
1751
1752static void ena_unmask_interrupt(struct ena_ring *tx_ring,
1753                                        struct ena_ring *rx_ring)
1754{
1755        struct ena_eth_io_intr_reg intr_reg;
1756        u32 rx_interval = 0;
1757        /* Rx ring can be NULL when for XDP tx queues which don't have an
1758         * accompanying rx_ring pair.
1759         */
1760        if (rx_ring)
1761                rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
1762                        rx_ring->smoothed_interval :
1763                        ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
1764
1765        /* Update intr register: rx intr delay,
1766         * tx intr delay and interrupt unmask
1767         */
1768        ena_com_update_intr_reg(&intr_reg,
1769                                rx_interval,
1770                                tx_ring->smoothed_interval,
1771                                true);
1772
1773        u64_stats_update_begin(&tx_ring->syncp);
1774        tx_ring->tx_stats.unmask_interrupt++;
1775        u64_stats_update_end(&tx_ring->syncp);
1776        /* It is a shared MSI-X.
1777         * Tx and Rx CQ have pointer to it.
1778         * So we use one of them to reach the intr reg
1779         * The Tx ring is used because the rx_ring is NULL for XDP queues
1780         */
1781        ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
1782}
1783
1784static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
1785                                             struct ena_ring *rx_ring)
1786{
1787        int cpu = get_cpu();
1788        int numa_node;
1789
1790        /* Check only one ring since the 2 rings are running on the same cpu */
1791        if (likely(tx_ring->cpu == cpu))
1792                goto out;
1793
1794        numa_node = cpu_to_node(cpu);
1795        put_cpu();
1796
1797        if (numa_node != NUMA_NO_NODE) {
1798                ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
1799                if (rx_ring)
1800                        ena_com_update_numa_node(rx_ring->ena_com_io_cq,
1801                                                 numa_node);
1802        }
1803
1804        tx_ring->cpu = cpu;
1805        if (rx_ring)
1806                rx_ring->cpu = cpu;
1807
1808        return;
1809out:
1810        put_cpu();
1811}
1812
1813static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
1814{
1815        u32 total_done = 0;
1816        u16 next_to_clean;
1817        u32 tx_bytes = 0;
1818        int tx_pkts = 0;
1819        u16 req_id;
1820        int rc;
1821
1822        if (unlikely(!xdp_ring))
1823                return 0;
1824        next_to_clean = xdp_ring->next_to_clean;
1825
1826        while (tx_pkts < budget) {
1827                struct ena_tx_buffer *tx_info;
1828                struct xdp_frame *xdpf;
1829
1830                rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
1831                                                &req_id);
1832                if (rc)
1833                        break;
1834
1835                rc = validate_xdp_req_id(xdp_ring, req_id);
1836                if (rc)
1837                        break;
1838
1839                tx_info = &xdp_ring->tx_buffer_info[req_id];
1840                xdpf = tx_info->xdpf;
1841
1842                tx_info->xdpf = NULL;
1843                tx_info->last_jiffies = 0;
1844                ena_unmap_tx_buff(xdp_ring, tx_info);
1845
1846                netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
1847                          "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
1848                          xdpf);
1849
1850                tx_bytes += xdpf->len;
1851                tx_pkts++;
1852                total_done += tx_info->tx_descs;
1853
1854                __free_page(tx_info->xdp_rx_page);
1855                xdp_ring->free_ids[next_to_clean] = req_id;
1856                next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1857                                                     xdp_ring->ring_size);
1858        }
1859
1860        xdp_ring->next_to_clean = next_to_clean;
1861        ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
1862        ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
1863
1864        netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
1865                  "tx_poll: q %d done. total pkts: %d\n",
1866                  xdp_ring->qid, tx_pkts);
1867
1868        return tx_pkts;
1869}
1870
1871static int ena_io_poll(struct napi_struct *napi, int budget)
1872{
1873        struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
1874        struct ena_ring *tx_ring, *rx_ring;
1875        int tx_work_done;
1876        int rx_work_done = 0;
1877        int tx_budget;
1878        int napi_comp_call = 0;
1879        int ret;
1880
1881        tx_ring = ena_napi->tx_ring;
1882        rx_ring = ena_napi->rx_ring;
1883
1884        tx_ring->first_interrupt = ena_napi->first_interrupt;
1885        rx_ring->first_interrupt = ena_napi->first_interrupt;
1886
1887        tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
1888
1889        if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1890            test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
1891                napi_complete_done(napi, 0);
1892                return 0;
1893        }
1894
1895        tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
1896        /* On netpoll the budget is zero and the handler should only clean the
1897         * tx completions.
1898         */
1899        if (likely(budget))
1900                rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
1901
1902        /* If the device is about to reset or down, avoid unmask
1903         * the interrupt and return 0 so NAPI won't reschedule
1904         */
1905        if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1906                     test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
1907                napi_complete_done(napi, 0);
1908                ret = 0;
1909
1910        } else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
1911                napi_comp_call = 1;
1912
1913                /* Update numa and unmask the interrupt only when schedule
1914                 * from the interrupt context (vs from sk_busy_loop)
1915                 */
1916                if (napi_complete_done(napi, rx_work_done)) {
1917                        /* We apply adaptive moderation on Rx path only.
1918                         * Tx uses static interrupt moderation.
1919                         */
1920                        if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
1921                                ena_adjust_adaptive_rx_intr_moderation(ena_napi);
1922
1923                        ena_unmask_interrupt(tx_ring, rx_ring);
1924                }
1925
1926                ena_update_ring_numa_node(tx_ring, rx_ring);
1927
1928                ret = rx_work_done;
1929        } else {
1930                ret = budget;
1931        }
1932
1933        u64_stats_update_begin(&tx_ring->syncp);
1934        tx_ring->tx_stats.napi_comp += napi_comp_call;
1935        tx_ring->tx_stats.tx_poll++;
1936        u64_stats_update_end(&tx_ring->syncp);
1937
1938        return ret;
1939}
1940
1941static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
1942{
1943        struct ena_adapter *adapter = (struct ena_adapter *)data;
1944
1945        ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1946
1947        /* Don't call the aenq handler before probe is done */
1948        if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
1949                ena_com_aenq_intr_handler(adapter->ena_dev, data);
1950
1951        return IRQ_HANDLED;
1952}
1953
1954/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
1955 * @irq: interrupt number
1956 * @data: pointer to a network interface private napi device structure
1957 */
1958static irqreturn_t ena_intr_msix_io(int irq, void *data)
1959{
1960        struct ena_napi *ena_napi = data;
1961
1962        ena_napi->first_interrupt = true;
1963
1964        napi_schedule_irqoff(&ena_napi->napi);
1965
1966        return IRQ_HANDLED;
1967}
1968
1969/* Reserve a single MSI-X vector for management (admin + aenq).
1970 * plus reserve one vector for each potential io queue.
1971 * the number of potential io queues is the minimum of what the device
1972 * supports and the number of vCPUs.
1973 */
1974static int ena_enable_msix(struct ena_adapter *adapter)
1975{
1976        int msix_vecs, irq_cnt;
1977
1978        if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1979                netif_err(adapter, probe, adapter->netdev,
1980                          "Error, MSI-X is already enabled\n");
1981                return -EPERM;
1982        }
1983
1984        /* Reserved the max msix vectors we might need */
1985        msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1986        netif_dbg(adapter, probe, adapter->netdev,
1987                  "trying to enable MSI-X, vectors %d\n", msix_vecs);
1988
1989        irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
1990                                        msix_vecs, PCI_IRQ_MSIX);
1991
1992        if (irq_cnt < 0) {
1993                netif_err(adapter, probe, adapter->netdev,
1994                          "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
1995                return -ENOSPC;
1996        }
1997
1998        if (irq_cnt != msix_vecs) {
1999                netif_notice(adapter, probe, adapter->netdev,
2000                             "enable only %d MSI-X (out of %d), reduce the number of queues\n",
2001                             irq_cnt, msix_vecs);
2002                adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
2003        }
2004
2005        if (ena_init_rx_cpu_rmap(adapter))
2006                netif_warn(adapter, probe, adapter->netdev,
2007                           "Failed to map IRQs to CPUs\n");
2008
2009        adapter->msix_vecs = irq_cnt;
2010        set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
2011
2012        return 0;
2013}
2014
2015static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
2016{
2017        u32 cpu;
2018
2019        snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
2020                 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
2021                 pci_name(adapter->pdev));
2022        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
2023                ena_intr_msix_mgmnt;
2024        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
2025        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
2026                pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
2027        cpu = cpumask_first(cpu_online_mask);
2028        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
2029        cpumask_set_cpu(cpu,
2030                        &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
2031}
2032
2033static void ena_setup_io_intr(struct ena_adapter *adapter)
2034{
2035        struct net_device *netdev;
2036        int irq_idx, i, cpu;
2037        int io_queue_count;
2038
2039        netdev = adapter->netdev;
2040        io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2041
2042        for (i = 0; i < io_queue_count; i++) {
2043                irq_idx = ENA_IO_IRQ_IDX(i);
2044                cpu = i % num_online_cpus();
2045
2046                snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
2047                         "%s-Tx-Rx-%d", netdev->name, i);
2048                adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
2049                adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
2050                adapter->irq_tbl[irq_idx].vector =
2051                        pci_irq_vector(adapter->pdev, irq_idx);
2052                adapter->irq_tbl[irq_idx].cpu = cpu;
2053
2054                cpumask_set_cpu(cpu,
2055                                &adapter->irq_tbl[irq_idx].affinity_hint_mask);
2056        }
2057}
2058
2059static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
2060{
2061        unsigned long flags = 0;
2062        struct ena_irq *irq;
2063        int rc;
2064
2065        irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2066        rc = request_irq(irq->vector, irq->handler, flags, irq->name,
2067                         irq->data);
2068        if (rc) {
2069                netif_err(adapter, probe, adapter->netdev,
2070                          "failed to request admin irq\n");
2071                return rc;
2072        }
2073
2074        netif_dbg(adapter, probe, adapter->netdev,
2075                  "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
2076                  irq->affinity_hint_mask.bits[0], irq->vector);
2077
2078        irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
2079
2080        return rc;
2081}
2082
2083static int ena_request_io_irq(struct ena_adapter *adapter)
2084{
2085        u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2086        unsigned long flags = 0;
2087        struct ena_irq *irq;
2088        int rc = 0, i, k;
2089
2090        if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
2091                netif_err(adapter, ifup, adapter->netdev,
2092                          "Failed to request I/O IRQ: MSI-X is not enabled\n");
2093                return -EINVAL;
2094        }
2095
2096        for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
2097                irq = &adapter->irq_tbl[i];
2098                rc = request_irq(irq->vector, irq->handler, flags, irq->name,
2099                                 irq->data);
2100                if (rc) {
2101                        netif_err(adapter, ifup, adapter->netdev,
2102                                  "Failed to request I/O IRQ. index %d rc %d\n",
2103                                   i, rc);
2104                        goto err;
2105                }
2106
2107                netif_dbg(adapter, ifup, adapter->netdev,
2108                          "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
2109                          i, irq->affinity_hint_mask.bits[0], irq->vector);
2110
2111                irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
2112        }
2113
2114        return rc;
2115
2116err:
2117        for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
2118                irq = &adapter->irq_tbl[k];
2119                free_irq(irq->vector, irq->data);
2120        }
2121
2122        return rc;
2123}
2124
2125static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
2126{
2127        struct ena_irq *irq;
2128
2129        irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2130        synchronize_irq(irq->vector);
2131        irq_set_affinity_hint(irq->vector, NULL);
2132        free_irq(irq->vector, irq->data);
2133}
2134
2135static void ena_free_io_irq(struct ena_adapter *adapter)
2136{
2137        u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2138        struct ena_irq *irq;
2139        int i;
2140
2141#ifdef CONFIG_RFS_ACCEL
2142        if (adapter->msix_vecs >= 1) {
2143                free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
2144                adapter->netdev->rx_cpu_rmap = NULL;
2145        }
2146#endif /* CONFIG_RFS_ACCEL */
2147
2148        for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
2149                irq = &adapter->irq_tbl[i];
2150                irq_set_affinity_hint(irq->vector, NULL);
2151                free_irq(irq->vector, irq->data);
2152        }
2153}
2154
2155static void ena_disable_msix(struct ena_adapter *adapter)
2156{
2157        if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
2158                pci_free_irq_vectors(adapter->pdev);
2159}
2160
2161static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
2162{
2163        u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2164        int i;
2165
2166        if (!netif_running(adapter->netdev))
2167                return;
2168
2169        for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
2170                synchronize_irq(adapter->irq_tbl[i].vector);
2171}
2172
2173static void ena_del_napi_in_range(struct ena_adapter *adapter,
2174                                  int first_index,
2175                                  int count)
2176{
2177        int i;
2178
2179        for (i = first_index; i < first_index + count; i++) {
2180                /* Check if napi was initialized before */
2181                if (!ENA_IS_XDP_INDEX(adapter, i) ||
2182                    adapter->ena_napi[i].xdp_ring)
2183                        netif_napi_del(&adapter->ena_napi[i].napi);
2184                else
2185                        WARN_ON(ENA_IS_XDP_INDEX(adapter, i) &&
2186                                adapter->ena_napi[i].xdp_ring);
2187        }
2188}
2189
2190static void ena_init_napi_in_range(struct ena_adapter *adapter,
2191                                   int first_index, int count)
2192{
2193        struct ena_napi *napi = {0};
2194        int i;
2195
2196        for (i = first_index; i < first_index + count; i++) {
2197                napi = &adapter->ena_napi[i];
2198
2199                netif_napi_add(adapter->netdev,
2200                               &adapter->ena_napi[i].napi,
2201                               ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll,
2202                               ENA_NAPI_BUDGET);
2203
2204                if (!ENA_IS_XDP_INDEX(adapter, i)) {
2205                        napi->rx_ring = &adapter->rx_ring[i];
2206                        napi->tx_ring = &adapter->tx_ring[i];
2207                } else {
2208                        napi->xdp_ring = &adapter->tx_ring[i];
2209                }
2210                napi->qid = i;
2211        }
2212}
2213
2214static void ena_napi_disable_in_range(struct ena_adapter *adapter,
2215                                      int first_index,
2216                                      int count)
2217{
2218        int i;
2219
2220        for (i = first_index; i < first_index + count; i++)
2221                napi_disable(&adapter->ena_napi[i].napi);
2222}
2223
2224static void ena_napi_enable_in_range(struct ena_adapter *adapter,
2225                                     int first_index,
2226                                     int count)
2227{
2228        int i;
2229
2230        for (i = first_index; i < first_index + count; i++)
2231                napi_enable(&adapter->ena_napi[i].napi);
2232}
2233
2234/* Configure the Rx forwarding */
2235static int ena_rss_configure(struct ena_adapter *adapter)
2236{
2237        struct ena_com_dev *ena_dev = adapter->ena_dev;
2238        int rc;
2239
2240        /* In case the RSS table wasn't initialized by probe */
2241        if (!ena_dev->rss.tbl_log_size) {
2242                rc = ena_rss_init_default(adapter);
2243                if (rc && (rc != -EOPNOTSUPP)) {
2244                        netif_err(adapter, ifup, adapter->netdev,
2245                                  "Failed to init RSS rc: %d\n", rc);
2246                        return rc;
2247                }
2248        }
2249
2250        /* Set indirect table */
2251        rc = ena_com_indirect_table_set(ena_dev);
2252        if (unlikely(rc && rc != -EOPNOTSUPP))
2253                return rc;
2254
2255        /* Configure hash function (if supported) */
2256        rc = ena_com_set_hash_function(ena_dev);
2257        if (unlikely(rc && (rc != -EOPNOTSUPP)))
2258                return rc;
2259
2260        /* Configure hash inputs (if supported) */
2261        rc = ena_com_set_hash_ctrl(ena_dev);
2262        if (unlikely(rc && (rc != -EOPNOTSUPP)))
2263                return rc;
2264
2265        return 0;
2266}
2267
2268static int ena_up_complete(struct ena_adapter *adapter)
2269{
2270        int rc;
2271
2272        rc = ena_rss_configure(adapter);
2273        if (rc)
2274                return rc;
2275
2276        ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
2277
2278        ena_refill_all_rx_bufs(adapter);
2279
2280        /* enable transmits */
2281        netif_tx_start_all_queues(adapter->netdev);
2282
2283        ena_napi_enable_in_range(adapter,
2284                                 0,
2285                                 adapter->xdp_num_queues + adapter->num_io_queues);
2286
2287        return 0;
2288}
2289
2290static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
2291{
2292        struct ena_com_create_io_ctx ctx;
2293        struct ena_com_dev *ena_dev;
2294        struct ena_ring *tx_ring;
2295        u32 msix_vector;
2296        u16 ena_qid;
2297        int rc;
2298
2299        ena_dev = adapter->ena_dev;
2300
2301        tx_ring = &adapter->tx_ring[qid];
2302        msix_vector = ENA_IO_IRQ_IDX(qid);
2303        ena_qid = ENA_IO_TXQ_IDX(qid);
2304
2305        memset(&ctx, 0x0, sizeof(ctx));
2306
2307        ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
2308        ctx.qid = ena_qid;
2309        ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
2310        ctx.msix_vector = msix_vector;
2311        ctx.queue_size = tx_ring->ring_size;
2312        ctx.numa_node = cpu_to_node(tx_ring->cpu);
2313
2314        rc = ena_com_create_io_queue(ena_dev, &ctx);
2315        if (rc) {
2316                netif_err(adapter, ifup, adapter->netdev,
2317                          "Failed to create I/O TX queue num %d rc: %d\n",
2318                          qid, rc);
2319                return rc;
2320        }
2321
2322        rc = ena_com_get_io_handlers(ena_dev, ena_qid,
2323                                     &tx_ring->ena_com_io_sq,
2324                                     &tx_ring->ena_com_io_cq);
2325        if (rc) {
2326                netif_err(adapter, ifup, adapter->netdev,
2327                          "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
2328                          qid, rc);
2329                ena_com_destroy_io_queue(ena_dev, ena_qid);
2330                return rc;
2331        }
2332
2333        ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
2334        return rc;
2335}
2336
2337static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
2338                                            int first_index, int count)
2339{
2340        struct ena_com_dev *ena_dev = adapter->ena_dev;
2341        int rc, i;
2342
2343        for (i = first_index; i < first_index + count; i++) {
2344                rc = ena_create_io_tx_queue(adapter, i);
2345                if (rc)
2346                        goto create_err;
2347        }
2348
2349        return 0;
2350
2351create_err:
2352        while (i-- > first_index)
2353                ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
2354
2355        return rc;
2356}
2357
2358static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
2359{
2360        struct ena_com_dev *ena_dev;
2361        struct ena_com_create_io_ctx ctx;
2362        struct ena_ring *rx_ring;
2363        u32 msix_vector;
2364        u16 ena_qid;
2365        int rc;
2366
2367        ena_dev = adapter->ena_dev;
2368
2369        rx_ring = &adapter->rx_ring[qid];
2370        msix_vector = ENA_IO_IRQ_IDX(qid);
2371        ena_qid = ENA_IO_RXQ_IDX(qid);
2372
2373        memset(&ctx, 0x0, sizeof(ctx));
2374
2375        ctx.qid = ena_qid;
2376        ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
2377        ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2378        ctx.msix_vector = msix_vector;
2379        ctx.queue_size = rx_ring->ring_size;
2380        ctx.numa_node = cpu_to_node(rx_ring->cpu);
2381
2382        rc = ena_com_create_io_queue(ena_dev, &ctx);
2383        if (rc) {
2384                netif_err(adapter, ifup, adapter->netdev,
2385                          "Failed to create I/O RX queue num %d rc: %d\n",
2386                          qid, rc);
2387                return rc;
2388        }
2389
2390        rc = ena_com_get_io_handlers(ena_dev, ena_qid,
2391                                     &rx_ring->ena_com_io_sq,
2392                                     &rx_ring->ena_com_io_cq);
2393        if (rc) {
2394                netif_err(adapter, ifup, adapter->netdev,
2395                          "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
2396                          qid, rc);
2397                goto err;
2398        }
2399
2400        ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
2401
2402        return rc;
2403err:
2404        ena_com_destroy_io_queue(ena_dev, ena_qid);
2405        return rc;
2406}
2407
2408static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
2409{
2410        struct ena_com_dev *ena_dev = adapter->ena_dev;
2411        int rc, i;
2412
2413        for (i = 0; i < adapter->num_io_queues; i++) {
2414                rc = ena_create_io_rx_queue(adapter, i);
2415                if (rc)
2416                        goto create_err;
2417                INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work);
2418        }
2419
2420        return 0;
2421
2422create_err:
2423        while (i--) {
2424                cancel_work_sync(&adapter->ena_napi[i].dim.work);
2425                ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
2426        }
2427
2428        return rc;
2429}
2430
2431static void set_io_rings_size(struct ena_adapter *adapter,
2432                              int new_tx_size,
2433                              int new_rx_size)
2434{
2435        int i;
2436
2437        for (i = 0; i < adapter->num_io_queues; i++) {
2438                adapter->tx_ring[i].ring_size = new_tx_size;
2439                adapter->rx_ring[i].ring_size = new_rx_size;
2440        }
2441}
2442
2443/* This function allows queue allocation to backoff when the system is
2444 * low on memory. If there is not enough memory to allocate io queues
2445 * the driver will try to allocate smaller queues.
2446 *
2447 * The backoff algorithm is as follows:
2448 *  1. Try to allocate TX and RX and if successful.
2449 *  1.1. return success
2450 *
2451 *  2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
2452 *
2453 *  3. If TX or RX is smaller than 256
2454 *  3.1. return failure.
2455 *  4. else
2456 *  4.1. go back to 1.
2457 */
2458static int create_queues_with_size_backoff(struct ena_adapter *adapter)
2459{
2460        int rc, cur_rx_ring_size, cur_tx_ring_size;
2461        int new_rx_ring_size, new_tx_ring_size;
2462
2463        /* current queue sizes might be set to smaller than the requested
2464         * ones due to past queue allocation failures.
2465         */
2466        set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2467                          adapter->requested_rx_ring_size);
2468
2469        while (1) {
2470                if (ena_xdp_present(adapter)) {
2471                        rc = ena_setup_and_create_all_xdp_queues(adapter);
2472
2473                        if (rc)
2474                                goto err_setup_tx;
2475                }
2476                rc = ena_setup_tx_resources_in_range(adapter,
2477                                                     0,
2478                                                     adapter->num_io_queues);
2479                if (rc)
2480                        goto err_setup_tx;
2481
2482                rc = ena_create_io_tx_queues_in_range(adapter,
2483                                                      0,
2484                                                      adapter->num_io_queues);
2485                if (rc)
2486                        goto err_create_tx_queues;
2487
2488                rc = ena_setup_all_rx_resources(adapter);
2489                if (rc)
2490                        goto err_setup_rx;
2491
2492                rc = ena_create_all_io_rx_queues(adapter);
2493                if (rc)
2494                        goto err_create_rx_queues;
2495
2496                return 0;
2497
2498err_create_rx_queues:
2499                ena_free_all_io_rx_resources(adapter);
2500err_setup_rx:
2501                ena_destroy_all_tx_queues(adapter);
2502err_create_tx_queues:
2503                ena_free_all_io_tx_resources(adapter);
2504err_setup_tx:
2505                if (rc != -ENOMEM) {
2506                        netif_err(adapter, ifup, adapter->netdev,
2507                                  "Queue creation failed with error code %d\n",
2508                                  rc);
2509                        return rc;
2510                }
2511
2512                cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2513                cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2514
2515                netif_err(adapter, ifup, adapter->netdev,
2516                          "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2517                          cur_tx_ring_size, cur_rx_ring_size);
2518
2519                new_tx_ring_size = cur_tx_ring_size;
2520                new_rx_ring_size = cur_rx_ring_size;
2521
2522                /* Decrease the size of the larger queue, or
2523                 * decrease both if they are the same size.
2524                 */
2525                if (cur_rx_ring_size <= cur_tx_ring_size)
2526                        new_tx_ring_size = cur_tx_ring_size / 2;
2527                if (cur_rx_ring_size >= cur_tx_ring_size)
2528                        new_rx_ring_size = cur_rx_ring_size / 2;
2529
2530                if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2531                    new_rx_ring_size < ENA_MIN_RING_SIZE) {
2532                        netif_err(adapter, ifup, adapter->netdev,
2533                                  "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
2534                                  ENA_MIN_RING_SIZE);
2535                        return rc;
2536                }
2537
2538                netif_err(adapter, ifup, adapter->netdev,
2539                          "Retrying queue creation with sizes TX=%d, RX=%d\n",
2540                          new_tx_ring_size,
2541                          new_rx_ring_size);
2542
2543                set_io_rings_size(adapter, new_tx_ring_size,
2544                                  new_rx_ring_size);
2545        }
2546}
2547
2548static int ena_up(struct ena_adapter *adapter)
2549{
2550        int io_queue_count, rc, i;
2551
2552        netdev_dbg(adapter->netdev, "%s\n", __func__);
2553
2554        io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2555        ena_setup_io_intr(adapter);
2556
2557        /* napi poll functions should be initialized before running
2558         * request_irq(), to handle a rare condition where there is a pending
2559         * interrupt, causing the ISR to fire immediately while the poll
2560         * function wasn't set yet, causing a null dereference
2561         */
2562        ena_init_napi_in_range(adapter, 0, io_queue_count);
2563
2564        rc = ena_request_io_irq(adapter);
2565        if (rc)
2566                goto err_req_irq;
2567
2568        rc = create_queues_with_size_backoff(adapter);
2569        if (rc)
2570                goto err_create_queues_with_backoff;
2571
2572        rc = ena_up_complete(adapter);
2573        if (rc)
2574                goto err_up;
2575
2576        if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
2577                netif_carrier_on(adapter->netdev);
2578
2579        u64_stats_update_begin(&adapter->syncp);
2580        adapter->dev_stats.interface_up++;
2581        u64_stats_update_end(&adapter->syncp);
2582
2583        set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2584
2585        /* Enable completion queues interrupt */
2586        for (i = 0; i < adapter->num_io_queues; i++)
2587                ena_unmask_interrupt(&adapter->tx_ring[i],
2588                                     &adapter->rx_ring[i]);
2589
2590        /* schedule napi in case we had pending packets
2591         * from the last time we disable napi
2592         */
2593        for (i = 0; i < io_queue_count; i++)
2594                napi_schedule(&adapter->ena_napi[i].napi);
2595
2596        return rc;
2597
2598err_up:
2599        ena_destroy_all_tx_queues(adapter);
2600        ena_free_all_io_tx_resources(adapter);
2601        ena_destroy_all_rx_queues(adapter);
2602        ena_free_all_io_rx_resources(adapter);
2603err_create_queues_with_backoff:
2604        ena_free_io_irq(adapter);
2605err_req_irq:
2606        ena_del_napi_in_range(adapter, 0, io_queue_count);
2607
2608        return rc;
2609}
2610
2611static void ena_down(struct ena_adapter *adapter)
2612{
2613        int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2614
2615        netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
2616
2617        clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2618
2619        u64_stats_update_begin(&adapter->syncp);
2620        adapter->dev_stats.interface_down++;
2621        u64_stats_update_end(&adapter->syncp);
2622
2623        netif_carrier_off(adapter->netdev);
2624        netif_tx_disable(adapter->netdev);
2625
2626        /* After this point the napi handler won't enable the tx queue */
2627        ena_napi_disable_in_range(adapter, 0, io_queue_count);
2628
2629        /* After destroy the queue there won't be any new interrupts */
2630
2631        if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
2632                int rc;
2633
2634                rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
2635                if (rc)
2636                        dev_err(&adapter->pdev->dev, "Device reset failed\n");
2637                /* stop submitting admin commands on a device that was reset */
2638                ena_com_set_admin_running_state(adapter->ena_dev, false);
2639        }
2640
2641        ena_destroy_all_io_queues(adapter);
2642
2643        ena_disable_io_intr_sync(adapter);
2644        ena_free_io_irq(adapter);
2645        ena_del_napi_in_range(adapter, 0, io_queue_count);
2646
2647        ena_free_all_tx_bufs(adapter);
2648        ena_free_all_rx_bufs(adapter);
2649        ena_free_all_io_tx_resources(adapter);
2650        ena_free_all_io_rx_resources(adapter);
2651}
2652
2653/* ena_open - Called when a network interface is made active
2654 * @netdev: network interface device structure
2655 *
2656 * Returns 0 on success, negative value on failure
2657 *
2658 * The open entry point is called when a network interface is made
2659 * active by the system (IFF_UP).  At this point all resources needed
2660 * for transmit and receive operations are allocated, the interrupt
2661 * handler is registered with the OS, the watchdog timer is started,
2662 * and the stack is notified that the interface is ready.
2663 */
2664static int ena_open(struct net_device *netdev)
2665{
2666        struct ena_adapter *adapter = netdev_priv(netdev);
2667        int rc;
2668
2669        /* Notify the stack of the actual queue counts. */
2670        rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
2671        if (rc) {
2672                netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
2673                return rc;
2674        }
2675
2676        rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
2677        if (rc) {
2678                netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
2679                return rc;
2680        }
2681
2682        rc = ena_up(adapter);
2683        if (rc)
2684                return rc;
2685
2686        return rc;
2687}
2688
2689/* ena_close - Disables a network interface
2690 * @netdev: network interface device structure
2691 *
2692 * Returns 0, this is not allowed to fail
2693 *
2694 * The close entry point is called when an interface is de-activated
2695 * by the OS.  The hardware is still under the drivers control, but
2696 * needs to be disabled.  A global MAC reset is issued to stop the
2697 * hardware, and all transmit and receive resources are freed.
2698 */
2699static int ena_close(struct net_device *netdev)
2700{
2701        struct ena_adapter *adapter = netdev_priv(netdev);
2702
2703        netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
2704
2705        if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
2706                return 0;
2707
2708        if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2709                ena_down(adapter);
2710
2711        /* Check for device status and issue reset if needed*/
2712        check_for_admin_com_state(adapter);
2713        if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2714                netif_err(adapter, ifdown, adapter->netdev,
2715                          "Destroy failure, restarting device\n");
2716                ena_dump_stats_to_dmesg(adapter);
2717                /* rtnl lock already obtained in dev_ioctl() layer */
2718                ena_destroy_device(adapter, false);
2719                ena_restore_device(adapter);
2720        }
2721
2722        return 0;
2723}
2724
2725int ena_update_queue_sizes(struct ena_adapter *adapter,
2726                           u32 new_tx_size,
2727                           u32 new_rx_size)
2728{
2729        bool dev_was_up;
2730
2731        dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2732        ena_close(adapter->netdev);
2733        adapter->requested_tx_ring_size = new_tx_size;
2734        adapter->requested_rx_ring_size = new_rx_size;
2735        ena_init_io_rings(adapter,
2736                          0,
2737                          adapter->xdp_num_queues +
2738                          adapter->num_io_queues);
2739        return dev_was_up ? ena_up(adapter) : 0;
2740}
2741
2742int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
2743{
2744        struct ena_com_dev *ena_dev = adapter->ena_dev;
2745        int prev_channel_count;
2746        bool dev_was_up;
2747
2748        dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2749        ena_close(adapter->netdev);
2750        prev_channel_count = adapter->num_io_queues;
2751        adapter->num_io_queues = new_channel_count;
2752        if (ena_xdp_present(adapter) &&
2753            ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
2754                adapter->xdp_first_ring = new_channel_count;
2755                adapter->xdp_num_queues = new_channel_count;
2756                if (prev_channel_count > new_channel_count)
2757                        ena_xdp_exchange_program_rx_in_range(adapter,
2758                                                             NULL,
2759                                                             new_channel_count,
2760                                                             prev_channel_count);
2761                else
2762                        ena_xdp_exchange_program_rx_in_range(adapter,
2763                                                             adapter->xdp_bpf_prog,
2764                                                             prev_channel_count,
2765                                                             new_channel_count);
2766        }
2767
2768        /* We need to destroy the rss table so that the indirection
2769         * table will be reinitialized by ena_up()
2770         */
2771        ena_com_rss_destroy(ena_dev);
2772        ena_init_io_rings(adapter,
2773                          0,
2774                          adapter->xdp_num_queues +
2775                          adapter->num_io_queues);
2776        return dev_was_up ? ena_open(adapter->netdev) : 0;
2777}
2778
2779static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
2780{
2781        u32 mss = skb_shinfo(skb)->gso_size;
2782        struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
2783        u8 l4_protocol = 0;
2784
2785        if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
2786                ena_tx_ctx->l4_csum_enable = 1;
2787                if (mss) {
2788                        ena_tx_ctx->tso_enable = 1;
2789                        ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
2790                        ena_tx_ctx->l4_csum_partial = 0;
2791                } else {
2792                        ena_tx_ctx->tso_enable = 0;
2793                        ena_meta->l4_hdr_len = 0;
2794                        ena_tx_ctx->l4_csum_partial = 1;
2795                }
2796
2797                switch (ip_hdr(skb)->version) {
2798                case IPVERSION:
2799                        ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2800                        if (ip_hdr(skb)->frag_off & htons(IP_DF))
2801                                ena_tx_ctx->df = 1;
2802                        if (mss)
2803                                ena_tx_ctx->l3_csum_enable = 1;
2804                        l4_protocol = ip_hdr(skb)->protocol;
2805                        break;
2806                case 6:
2807                        ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2808                        l4_protocol = ipv6_hdr(skb)->nexthdr;
2809                        break;
2810                default:
2811                        break;
2812                }
2813
2814                if (l4_protocol == IPPROTO_TCP)
2815                        ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2816                else
2817                        ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2818
2819                ena_meta->mss = mss;
2820                ena_meta->l3_hdr_len = skb_network_header_len(skb);
2821                ena_meta->l3_hdr_offset = skb_network_offset(skb);
2822                ena_tx_ctx->meta_valid = 1;
2823
2824        } else {
2825                ena_tx_ctx->meta_valid = 0;
2826        }
2827}
2828
2829static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
2830                                       struct sk_buff *skb)
2831{
2832        int num_frags, header_len, rc;
2833
2834        num_frags = skb_shinfo(skb)->nr_frags;
2835        header_len = skb_headlen(skb);
2836
2837        if (num_frags < tx_ring->sgl_size)
2838                return 0;
2839
2840        if ((num_frags == tx_ring->sgl_size) &&
2841            (header_len < tx_ring->tx_max_header_size))
2842                return 0;
2843
2844        u64_stats_update_begin(&tx_ring->syncp);
2845        tx_ring->tx_stats.linearize++;
2846        u64_stats_update_end(&tx_ring->syncp);
2847
2848        rc = skb_linearize(skb);
2849        if (unlikely(rc)) {
2850                u64_stats_update_begin(&tx_ring->syncp);
2851                tx_ring->tx_stats.linearize_failed++;
2852                u64_stats_update_end(&tx_ring->syncp);
2853        }
2854
2855        return rc;
2856}
2857
2858static int ena_tx_map_skb(struct ena_ring *tx_ring,
2859                          struct ena_tx_buffer *tx_info,
2860                          struct sk_buff *skb,
2861                          void **push_hdr,
2862                          u16 *header_len)
2863{
2864        struct ena_adapter *adapter = tx_ring->adapter;
2865        struct ena_com_buf *ena_buf;
2866        dma_addr_t dma;
2867        u32 skb_head_len, frag_len, last_frag;
2868        u16 push_len = 0;
2869        u16 delta = 0;
2870        int i = 0;
2871
2872        skb_head_len = skb_headlen(skb);
2873        tx_info->skb = skb;
2874        ena_buf = tx_info->bufs;
2875
2876        if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2877                /* When the device is LLQ mode, the driver will copy
2878                 * the header into the device memory space.
2879                 * the ena_com layer assume the header is in a linear
2880                 * memory space.
2881                 * This assumption might be wrong since part of the header
2882                 * can be in the fragmented buffers.
2883                 * Use skb_header_pointer to make sure the header is in a
2884                 * linear memory space.
2885                 */
2886
2887                push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
2888                *push_hdr = skb_header_pointer(skb, 0, push_len,
2889                                               tx_ring->push_buf_intermediate_buf);
2890                *header_len = push_len;
2891                if (unlikely(skb->data != *push_hdr)) {
2892                        u64_stats_update_begin(&tx_ring->syncp);
2893                        tx_ring->tx_stats.llq_buffer_copy++;
2894                        u64_stats_update_end(&tx_ring->syncp);
2895
2896                        delta = push_len - skb_head_len;
2897                }
2898        } else {
2899                *push_hdr = NULL;
2900                *header_len = min_t(u32, skb_head_len,
2901                                    tx_ring->tx_max_header_size);
2902        }
2903
2904        netif_dbg(adapter, tx_queued, adapter->netdev,
2905                  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
2906                  *push_hdr, push_len);
2907
2908        if (skb_head_len > push_len) {
2909                dma = dma_map_single(tx_ring->dev, skb->data + push_len,
2910                                     skb_head_len - push_len, DMA_TO_DEVICE);
2911                if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2912                        goto error_report_dma_error;
2913
2914                ena_buf->paddr = dma;
2915                ena_buf->len = skb_head_len - push_len;
2916
2917                ena_buf++;
2918                tx_info->num_of_bufs++;
2919                tx_info->map_linear_data = 1;
2920        } else {
2921                tx_info->map_linear_data = 0;
2922        }
2923
2924        last_frag = skb_shinfo(skb)->nr_frags;
2925
2926        for (i = 0; i < last_frag; i++) {
2927                const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2928
2929                frag_len = skb_frag_size(frag);
2930
2931                if (unlikely(delta >= frag_len)) {
2932                        delta -= frag_len;
2933                        continue;
2934                }
2935
2936                dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
2937                                       frag_len - delta, DMA_TO_DEVICE);
2938                if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2939                        goto error_report_dma_error;
2940
2941                ena_buf->paddr = dma;
2942                ena_buf->len = frag_len - delta;
2943                ena_buf++;
2944                tx_info->num_of_bufs++;
2945                delta = 0;
2946        }
2947
2948        return 0;
2949
2950error_report_dma_error:
2951        u64_stats_update_begin(&tx_ring->syncp);
2952        tx_ring->tx_stats.dma_mapping_err++;
2953        u64_stats_update_end(&tx_ring->syncp);
2954        netdev_warn(adapter->netdev, "failed to map skb\n");
2955
2956        tx_info->skb = NULL;
2957
2958        tx_info->num_of_bufs += i;
2959        ena_unmap_tx_buff(tx_ring, tx_info);
2960
2961        return -EINVAL;
2962}
2963
2964/* Called with netif_tx_lock. */
2965static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
2966{
2967        struct ena_adapter *adapter = netdev_priv(dev);
2968        struct ena_tx_buffer *tx_info;
2969        struct ena_com_tx_ctx ena_tx_ctx;
2970        struct ena_ring *tx_ring;
2971        struct netdev_queue *txq;
2972        void *push_hdr;
2973        u16 next_to_use, req_id, header_len;
2974        int qid, rc;
2975
2976        netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
2977        /*  Determine which tx ring we will be placed on */
2978        qid = skb_get_queue_mapping(skb);
2979        tx_ring = &adapter->tx_ring[qid];
2980        txq = netdev_get_tx_queue(dev, qid);
2981
2982        rc = ena_check_and_linearize_skb(tx_ring, skb);
2983        if (unlikely(rc))
2984                goto error_drop_packet;
2985
2986        skb_tx_timestamp(skb);
2987
2988        next_to_use = tx_ring->next_to_use;
2989        req_id = tx_ring->free_ids[next_to_use];
2990        tx_info = &tx_ring->tx_buffer_info[req_id];
2991        tx_info->num_of_bufs = 0;
2992
2993        WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
2994
2995        rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
2996        if (unlikely(rc))
2997                goto error_drop_packet;
2998
2999        memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
3000        ena_tx_ctx.ena_bufs = tx_info->bufs;
3001        ena_tx_ctx.push_header = push_hdr;
3002        ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
3003        ena_tx_ctx.req_id = req_id;
3004        ena_tx_ctx.header_len = header_len;
3005
3006        /* set flags and meta data */
3007        ena_tx_csum(&ena_tx_ctx, skb);
3008
3009        rc = ena_xmit_common(dev,
3010                             tx_ring,
3011                             tx_info,
3012                             &ena_tx_ctx,
3013                             next_to_use,
3014                             skb->len);
3015        if (rc)
3016                goto error_unmap_dma;
3017
3018        netdev_tx_sent_queue(txq, skb->len);
3019
3020        /* stop the queue when no more space available, the packet can have up
3021         * to sgl_size + 2. one for the meta descriptor and one for header
3022         * (if the header is larger than tx_max_header_size).
3023         */
3024        if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3025                                                   tx_ring->sgl_size + 2))) {
3026                netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
3027                          __func__, qid);
3028
3029                netif_tx_stop_queue(txq);
3030                u64_stats_update_begin(&tx_ring->syncp);
3031                tx_ring->tx_stats.queue_stop++;
3032                u64_stats_update_end(&tx_ring->syncp);
3033
3034                /* There is a rare condition where this function decide to
3035                 * stop the queue but meanwhile clean_tx_irq updates
3036                 * next_to_completion and terminates.
3037                 * The queue will remain stopped forever.
3038                 * To solve this issue add a mb() to make sure that
3039                 * netif_tx_stop_queue() write is vissible before checking if
3040                 * there is additional space in the queue.
3041                 */
3042                smp_mb();
3043
3044                if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3045                                                 ENA_TX_WAKEUP_THRESH)) {
3046                        netif_tx_wake_queue(txq);
3047                        u64_stats_update_begin(&tx_ring->syncp);
3048                        tx_ring->tx_stats.queue_wakeup++;
3049                        u64_stats_update_end(&tx_ring->syncp);
3050                }
3051        }
3052
3053        if (netif_xmit_stopped(txq) || !netdev_xmit_more()) {
3054                /* trigger the dma engine. ena_com_write_sq_doorbell()
3055                 * has a mb
3056                 */
3057                ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
3058                u64_stats_update_begin(&tx_ring->syncp);
3059                tx_ring->tx_stats.doorbells++;
3060                u64_stats_update_end(&tx_ring->syncp);
3061        }
3062
3063        return NETDEV_TX_OK;
3064
3065error_unmap_dma:
3066        ena_unmap_tx_buff(tx_ring, tx_info);
3067        tx_info->skb = NULL;
3068
3069error_drop_packet:
3070        dev_kfree_skb(skb);
3071        return NETDEV_TX_OK;
3072}
3073
3074static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
3075                            struct net_device *sb_dev)
3076{
3077        u16 qid;
3078        /* we suspect that this is good for in--kernel network services that
3079         * want to loop incoming skb rx to tx in normal user generated traffic,
3080         * most probably we will not get to this
3081         */
3082        if (skb_rx_queue_recorded(skb))
3083                qid = skb_get_rx_queue(skb);
3084        else
3085                qid = netdev_pick_tx(dev, skb, NULL);
3086
3087        return qid;
3088}
3089
3090static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
3091{
3092        struct ena_admin_host_info *host_info;
3093        int rc;
3094
3095        /* Allocate only the host info */
3096        rc = ena_com_allocate_host_info(ena_dev);
3097        if (rc) {
3098                pr_err("Cannot allocate host info\n");
3099                return;
3100        }
3101
3102        host_info = ena_dev->host_attr.host_info;
3103
3104        host_info->bdf = (pdev->bus->number << 8) | pdev->devfn;
3105        host_info->os_type = ENA_ADMIN_OS_LINUX;
3106        host_info->kernel_ver = LINUX_VERSION_CODE;
3107        strlcpy(host_info->kernel_ver_str, utsname()->version,
3108                sizeof(host_info->kernel_ver_str) - 1);
3109        host_info->os_dist = 0;
3110        strncpy(host_info->os_dist_str, utsname()->release,
3111                sizeof(host_info->os_dist_str) - 1);
3112        host_info->driver_version =
3113                (DRV_MODULE_GEN_MAJOR) |
3114                (DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
3115                (DRV_MODULE_GEN_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
3116                ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
3117        host_info->num_cpus = num_online_cpus();
3118
3119        host_info->driver_supported_features =
3120                ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
3121                ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK;
3122
3123        rc = ena_com_set_host_attributes(ena_dev);
3124        if (rc) {
3125                if (rc == -EOPNOTSUPP)
3126                        pr_warn("Cannot set host attributes\n");
3127                else
3128                        pr_err("Cannot set host attributes\n");
3129
3130                goto err;
3131        }
3132
3133        return;
3134
3135err:
3136        ena_com_delete_host_info(ena_dev);
3137}
3138
3139static void ena_config_debug_area(struct ena_adapter *adapter)
3140{
3141        u32 debug_area_size;
3142        int rc, ss_count;
3143
3144        ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
3145        if (ss_count <= 0) {
3146                netif_err(adapter, drv, adapter->netdev,
3147                          "SS count is negative\n");
3148                return;
3149        }
3150
3151        /* allocate 32 bytes for each string and 64bit for the value */
3152        debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
3153
3154        rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
3155        if (rc) {
3156                pr_err("Cannot allocate debug area\n");
3157                return;
3158        }
3159
3160        rc = ena_com_set_host_attributes(adapter->ena_dev);
3161        if (rc) {
3162                if (rc == -EOPNOTSUPP)
3163                        netif_warn(adapter, drv, adapter->netdev,
3164                                   "Cannot set host attributes\n");
3165                else
3166                        netif_err(adapter, drv, adapter->netdev,
3167                                  "Cannot set host attributes\n");
3168                goto err;
3169        }
3170
3171        return;
3172err:
3173        ena_com_delete_debug_area(adapter->ena_dev);
3174}
3175
3176static void ena_get_stats64(struct net_device *netdev,
3177                            struct rtnl_link_stats64 *stats)
3178{
3179        struct ena_adapter *adapter = netdev_priv(netdev);
3180        struct ena_ring *rx_ring, *tx_ring;
3181        unsigned int start;
3182        u64 rx_drops;
3183        u64 tx_drops;
3184        int i;
3185
3186        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3187                return;
3188
3189        for (i = 0; i < adapter->num_io_queues; i++) {
3190                u64 bytes, packets;
3191
3192                tx_ring = &adapter->tx_ring[i];
3193
3194                do {
3195                        start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
3196                        packets = tx_ring->tx_stats.cnt;
3197                        bytes = tx_ring->tx_stats.bytes;
3198                } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
3199
3200                stats->tx_packets += packets;
3201                stats->tx_bytes += bytes;
3202
3203                rx_ring = &adapter->rx_ring[i];
3204
3205                do {
3206                        start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
3207                        packets = rx_ring->rx_stats.cnt;
3208                        bytes = rx_ring->rx_stats.bytes;
3209                } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
3210
3211                stats->rx_packets += packets;
3212                stats->rx_bytes += bytes;
3213        }
3214
3215        do {
3216                start = u64_stats_fetch_begin_irq(&adapter->syncp);
3217                rx_drops = adapter->dev_stats.rx_drops;
3218                tx_drops = adapter->dev_stats.tx_drops;
3219        } while (u64_stats_fetch_retry_irq(&adapter->syncp, start));
3220
3221        stats->rx_dropped = rx_drops;
3222        stats->tx_dropped = tx_drops;
3223
3224        stats->multicast = 0;
3225        stats->collisions = 0;
3226
3227        stats->rx_length_errors = 0;
3228        stats->rx_crc_errors = 0;
3229        stats->rx_frame_errors = 0;
3230        stats->rx_fifo_errors = 0;
3231        stats->rx_missed_errors = 0;
3232        stats->tx_window_errors = 0;
3233
3234        stats->rx_errors = 0;
3235        stats->tx_errors = 0;
3236}
3237
3238static const struct net_device_ops ena_netdev_ops = {
3239        .ndo_open               = ena_open,
3240        .ndo_stop               = ena_close,
3241        .ndo_start_xmit         = ena_start_xmit,
3242        .ndo_select_queue       = ena_select_queue,
3243        .ndo_get_stats64        = ena_get_stats64,
3244        .ndo_tx_timeout         = ena_tx_timeout,
3245        .ndo_change_mtu         = ena_change_mtu,
3246        .ndo_set_mac_address    = NULL,
3247        .ndo_validate_addr      = eth_validate_addr,
3248        .ndo_bpf                = ena_xdp,
3249};
3250
3251static int ena_device_validate_params(struct ena_adapter *adapter,
3252                                      struct ena_com_dev_get_features_ctx *get_feat_ctx)
3253{
3254        struct net_device *netdev = adapter->netdev;
3255        int rc;
3256
3257        rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
3258                              adapter->mac_addr);
3259        if (!rc) {
3260                netif_err(adapter, drv, netdev,
3261                          "Error, mac address are different\n");
3262                return -EINVAL;
3263        }
3264
3265        if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
3266                netif_err(adapter, drv, netdev,
3267                          "Error, device max mtu is smaller than netdev MTU\n");
3268                return -EINVAL;
3269        }
3270
3271        return 0;
3272}
3273
3274static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
3275                           struct ena_com_dev_get_features_ctx *get_feat_ctx,
3276                           bool *wd_state)
3277{
3278        struct device *dev = &pdev->dev;
3279        bool readless_supported;
3280        u32 aenq_groups;
3281        int dma_width;
3282        int rc;
3283
3284        rc = ena_com_mmio_reg_read_request_init(ena_dev);
3285        if (rc) {
3286                dev_err(dev, "failed to init mmio read less\n");
3287                return rc;
3288        }
3289
3290        /* The PCIe configuration space revision id indicate if mmio reg
3291         * read is disabled
3292         */
3293        readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
3294        ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3295
3296        rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
3297        if (rc) {
3298                dev_err(dev, "Can not reset device\n");
3299                goto err_mmio_read_less;
3300        }
3301
3302        rc = ena_com_validate_version(ena_dev);
3303        if (rc) {
3304                dev_err(dev, "device version is too low\n");
3305                goto err_mmio_read_less;
3306        }
3307
3308        dma_width = ena_com_get_dma_width(ena_dev);
3309        if (dma_width < 0) {
3310                dev_err(dev, "Invalid dma width value %d", dma_width);
3311                rc = dma_width;
3312                goto err_mmio_read_less;
3313        }
3314
3315        rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
3316        if (rc) {
3317                dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc);
3318                goto err_mmio_read_less;
3319        }
3320
3321        rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
3322        if (rc) {
3323                dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n",
3324                        rc);
3325                goto err_mmio_read_less;
3326        }
3327
3328        /* ENA admin level init */
3329        rc = ena_com_admin_init(ena_dev, &aenq_handlers);
3330        if (rc) {
3331                dev_err(dev,
3332                        "Can not initialize ena admin queue with device\n");
3333                goto err_mmio_read_less;
3334        }
3335
3336        /* To enable the msix interrupts the driver needs to know the number
3337         * of queues. So the driver uses polling mode to retrieve this
3338         * information
3339         */
3340        ena_com_set_admin_polling_mode(ena_dev, true);
3341
3342        ena_config_host_info(ena_dev, pdev);
3343
3344        /* Get Device Attributes*/
3345        rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3346        if (rc) {
3347                dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
3348                goto err_admin_init;
3349        }
3350
3351        /* Try to turn all the available aenq groups */
3352        aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
3353                BIT(ENA_ADMIN_FATAL_ERROR) |
3354                BIT(ENA_ADMIN_WARNING) |
3355                BIT(ENA_ADMIN_NOTIFICATION) |
3356                BIT(ENA_ADMIN_KEEP_ALIVE);
3357
3358        aenq_groups &= get_feat_ctx->aenq.supported_groups;
3359
3360        rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3361        if (rc) {
3362                dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
3363                goto err_admin_init;
3364        }
3365
3366        *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3367
3368        return 0;
3369
3370err_admin_init:
3371        ena_com_delete_host_info(ena_dev);
3372        ena_com_admin_destroy(ena_dev);
3373err_mmio_read_less:
3374        ena_com_mmio_reg_read_request_destroy(ena_dev);
3375
3376        return rc;
3377}
3378
3379static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
3380{
3381        struct ena_com_dev *ena_dev = adapter->ena_dev;
3382        struct device *dev = &adapter->pdev->dev;
3383        int rc;
3384
3385        rc = ena_enable_msix(adapter);
3386        if (rc) {
3387                dev_err(dev, "Can not reserve msix vectors\n");
3388                return rc;
3389        }
3390
3391        ena_setup_mgmnt_intr(adapter);
3392
3393        rc = ena_request_mgmnt_irq(adapter);
3394        if (rc) {
3395                dev_err(dev, "Can not setup management interrupts\n");
3396                goto err_disable_msix;
3397        }
3398
3399        ena_com_set_admin_polling_mode(ena_dev, false);
3400
3401        ena_com_admin_aenq_enable(ena_dev);
3402
3403        return 0;
3404
3405err_disable_msix:
3406        ena_disable_msix(adapter);
3407
3408        return rc;
3409}
3410
3411static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3412{
3413        struct net_device *netdev = adapter->netdev;
3414        struct ena_com_dev *ena_dev = adapter->ena_dev;
3415        bool dev_up;
3416
3417        if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
3418                return;
3419
3420        netif_carrier_off(netdev);
3421
3422        del_timer_sync(&adapter->timer_service);
3423
3424        dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
3425        adapter->dev_up_before_reset = dev_up;
3426        if (!graceful)
3427                ena_com_set_admin_running_state(ena_dev, false);
3428
3429        if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3430                ena_down(adapter);
3431
3432        /* Stop the device from sending AENQ events (in case reset flag is set
3433         *  and device is up, ena_down() already reset the device.
3434         */
3435        if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
3436                ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3437
3438        ena_free_mgmnt_irq(adapter);
3439
3440        ena_disable_msix(adapter);
3441
3442        ena_com_abort_admin_commands(ena_dev);
3443
3444        ena_com_wait_for_abort_completion(ena_dev);
3445
3446        ena_com_admin_destroy(ena_dev);
3447
3448        ena_com_mmio_reg_read_request_destroy(ena_dev);
3449
3450        /* return reset reason to default value */
3451        adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3452
3453        clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3454        clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3455}
3456
3457static int ena_restore_device(struct ena_adapter *adapter)
3458{
3459        struct ena_com_dev_get_features_ctx get_feat_ctx;
3460        struct ena_com_dev *ena_dev = adapter->ena_dev;
3461        struct pci_dev *pdev = adapter->pdev;
3462        bool wd_state;
3463        int rc;
3464
3465        set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3466        rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
3467        if (rc) {
3468                dev_err(&pdev->dev, "Can not initialize device\n");
3469                goto err;
3470        }
3471        adapter->wd_state = wd_state;
3472
3473        rc = ena_device_validate_params(adapter, &get_feat_ctx);
3474        if (rc) {
3475                dev_err(&pdev->dev, "Validation of device parameters failed\n");
3476                goto err_device_destroy;
3477        }
3478
3479        rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3480        if (rc) {
3481                dev_err(&pdev->dev, "Enable MSI-X failed\n");
3482                goto err_device_destroy;
3483        }
3484        /* If the interface was up before the reset bring it up */
3485        if (adapter->dev_up_before_reset) {
3486                rc = ena_up(adapter);
3487                if (rc) {
3488                        dev_err(&pdev->dev, "Failed to create I/O queues\n");
3489                        goto err_disable_msix;
3490                }
3491        }
3492
3493        set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3494
3495        clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3496        if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
3497                netif_carrier_on(adapter->netdev);
3498
3499        mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3500        dev_err(&pdev->dev, "Device reset completed successfully\n");
3501        adapter->last_keep_alive_jiffies = jiffies;
3502
3503        return rc;
3504err_disable_msix:
3505        ena_free_mgmnt_irq(adapter);
3506        ena_disable_msix(adapter);
3507err_device_destroy:
3508        ena_com_abort_admin_commands(ena_dev);
3509        ena_com_wait_for_abort_completion(ena_dev);
3510        ena_com_admin_destroy(ena_dev);
3511        ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3512        ena_com_mmio_reg_read_request_destroy(ena_dev);
3513err:
3514        clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3515        clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3516        dev_err(&pdev->dev,
3517                "Reset attempt failed. Can not reset the device\n");
3518
3519        return rc;
3520}
3521
3522static void ena_fw_reset_device(struct work_struct *work)
3523{
3524        struct ena_adapter *adapter =
3525                container_of(work, struct ena_adapter, reset_task);
3526        struct pci_dev *pdev = adapter->pdev;
3527
3528        if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3529                dev_err(&pdev->dev,
3530                        "device reset schedule while reset bit is off\n");
3531                return;
3532        }
3533        rtnl_lock();
3534        ena_destroy_device(adapter, false);
3535        ena_restore_device(adapter);
3536        rtnl_unlock();
3537}
3538
3539static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3540                                        struct ena_ring *rx_ring)
3541{
3542        if (likely(rx_ring->first_interrupt))
3543                return 0;
3544
3545        if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3546                return 0;
3547
3548        rx_ring->no_interrupt_event_cnt++;
3549
3550        if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3551                netif_err(adapter, rx_err, adapter->netdev,
3552                          "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
3553                          rx_ring->qid);
3554                adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
3555                smp_mb__before_atomic();
3556                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3557                return -EIO;
3558        }
3559
3560        return 0;
3561}
3562
3563static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3564                                          struct ena_ring *tx_ring)
3565{
3566        struct ena_tx_buffer *tx_buf;
3567        unsigned long last_jiffies;
3568        u32 missed_tx = 0;
3569        int i, rc = 0;
3570
3571        for (i = 0; i < tx_ring->ring_size; i++) {
3572                tx_buf = &tx_ring->tx_buffer_info[i];
3573                last_jiffies = tx_buf->last_jiffies;
3574
3575                if (last_jiffies == 0)
3576                        /* no pending Tx at this location */
3577                        continue;
3578
3579                if (unlikely(!tx_ring->first_interrupt && time_is_before_jiffies(last_jiffies +
3580                             2 * adapter->missing_tx_completion_to))) {
3581                        /* If after graceful period interrupt is still not
3582                         * received, we schedule a reset
3583                         */
3584                        netif_err(adapter, tx_err, adapter->netdev,
3585                                  "Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
3586                                  tx_ring->qid);
3587                        adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
3588                        smp_mb__before_atomic();
3589                        set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3590                        return -EIO;
3591                }
3592
3593                if (unlikely(time_is_before_jiffies(last_jiffies +
3594                                adapter->missing_tx_completion_to))) {
3595                        if (!tx_buf->print_once)
3596                                netif_notice(adapter, tx_err, adapter->netdev,
3597                                             "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
3598                                             tx_ring->qid, i);
3599
3600                        tx_buf->print_once = 1;
3601                        missed_tx++;
3602                }
3603        }
3604
3605        if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
3606                netif_err(adapter, tx_err, adapter->netdev,
3607                          "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
3608                          missed_tx,
3609                          adapter->missing_tx_completion_threshold);
3610                adapter->reset_reason =
3611                        ENA_REGS_RESET_MISS_TX_CMPL;
3612                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3613                rc = -EIO;
3614        }
3615
3616        u64_stats_update_begin(&tx_ring->syncp);
3617        tx_ring->tx_stats.missed_tx = missed_tx;
3618        u64_stats_update_end(&tx_ring->syncp);
3619
3620        return rc;
3621}
3622
3623static void check_for_missing_completions(struct ena_adapter *adapter)
3624{
3625        struct ena_ring *tx_ring;
3626        struct ena_ring *rx_ring;
3627        int i, budget, rc;
3628        int io_queue_count;
3629
3630        io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
3631        /* Make sure the driver doesn't turn the device in other process */
3632        smp_rmb();
3633
3634        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3635                return;
3636
3637        if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3638                return;
3639
3640        if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
3641                return;
3642
3643        budget = ENA_MONITORED_TX_QUEUES;
3644
3645        for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
3646                tx_ring = &adapter->tx_ring[i];
3647                rx_ring = &adapter->rx_ring[i];
3648
3649                rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3650                if (unlikely(rc))
3651                        return;
3652
3653                rc =  !ENA_IS_XDP_INDEX(adapter, i) ?
3654                        check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
3655                if (unlikely(rc))
3656                        return;
3657
3658                budget--;
3659                if (!budget)
3660                        break;
3661        }
3662
3663        adapter->last_monitored_tx_qid = i % io_queue_count;
3664}
3665
3666/* trigger napi schedule after 2 consecutive detections */
3667#define EMPTY_RX_REFILL 2
3668/* For the rare case where the device runs out of Rx descriptors and the
3669 * napi handler failed to refill new Rx descriptors (due to a lack of memory
3670 * for example).
3671 * This case will lead to a deadlock:
3672 * The device won't send interrupts since all the new Rx packets will be dropped
3673 * The napi handler won't allocate new Rx descriptors so the device will be
3674 * able to send new packets.
3675 *
3676 * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
3677 * It is recommended to have at least 512MB, with a minimum of 128MB for
3678 * constrained environment).
3679 *
3680 * When such a situation is detected - Reschedule napi
3681 */
3682static void check_for_empty_rx_ring(struct ena_adapter *adapter)
3683{
3684        struct ena_ring *rx_ring;
3685        int i, refill_required;
3686
3687        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3688                return;
3689
3690        if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3691                return;
3692
3693        for (i = 0; i < adapter->num_io_queues; i++) {
3694                rx_ring = &adapter->rx_ring[i];
3695
3696                refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
3697                if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3698                        rx_ring->empty_rx_queue++;
3699
3700                        if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3701                                u64_stats_update_begin(&rx_ring->syncp);
3702                                rx_ring->rx_stats.empty_rx_ring++;
3703                                u64_stats_update_end(&rx_ring->syncp);
3704
3705                                netif_err(adapter, drv, adapter->netdev,
3706                                          "trigger refill for ring %d\n", i);
3707
3708                                napi_schedule(rx_ring->napi);
3709                                rx_ring->empty_rx_queue = 0;
3710                        }
3711                } else {
3712                        rx_ring->empty_rx_queue = 0;
3713                }
3714        }
3715}
3716
3717/* Check for keep alive expiration */
3718static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3719{
3720        unsigned long keep_alive_expired;
3721
3722        if (!adapter->wd_state)
3723                return;
3724
3725        if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3726                return;
3727
3728        keep_alive_expired = adapter->last_keep_alive_jiffies +
3729                             adapter->keep_alive_timeout;
3730        if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
3731                netif_err(adapter, drv, adapter->netdev,
3732                          "Keep alive watchdog timeout.\n");
3733                u64_stats_update_begin(&adapter->syncp);
3734                adapter->dev_stats.wd_expired++;
3735                u64_stats_update_end(&adapter->syncp);
3736                adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
3737                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3738        }
3739}
3740
3741static void check_for_admin_com_state(struct ena_adapter *adapter)
3742{
3743        if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
3744                netif_err(adapter, drv, adapter->netdev,
3745                          "ENA admin queue is not in running state!\n");
3746                u64_stats_update_begin(&adapter->syncp);
3747                adapter->dev_stats.admin_q_pause++;
3748                u64_stats_update_end(&adapter->syncp);
3749                adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
3750                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3751        }
3752}
3753
3754static void ena_update_hints(struct ena_adapter *adapter,
3755                             struct ena_admin_ena_hw_hints *hints)
3756{
3757        struct net_device *netdev = adapter->netdev;
3758
3759        if (hints->admin_completion_tx_timeout)
3760                adapter->ena_dev->admin_queue.completion_timeout =
3761                        hints->admin_completion_tx_timeout * 1000;
3762
3763        if (hints->mmio_read_timeout)
3764                /* convert to usec */
3765                adapter->ena_dev->mmio_read.reg_read_to =
3766                        hints->mmio_read_timeout * 1000;
3767
3768        if (hints->missed_tx_completion_count_threshold_to_reset)
3769                adapter->missing_tx_completion_threshold =
3770                        hints->missed_tx_completion_count_threshold_to_reset;
3771
3772        if (hints->missing_tx_completion_timeout) {
3773                if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3774                        adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
3775                else
3776                        adapter->missing_tx_completion_to =
3777                                msecs_to_jiffies(hints->missing_tx_completion_timeout);
3778        }
3779
3780        if (hints->netdev_wd_timeout)
3781                netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
3782
3783        if (hints->driver_watchdog_timeout) {
3784                if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3785                        adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3786                else
3787                        adapter->keep_alive_timeout =
3788                                msecs_to_jiffies(hints->driver_watchdog_timeout);
3789        }
3790}
3791
3792static void ena_update_host_info(struct ena_admin_host_info *host_info,
3793                                 struct net_device *netdev)
3794{
3795        host_info->supported_network_features[0] =
3796                netdev->features & GENMASK_ULL(31, 0);
3797        host_info->supported_network_features[1] =
3798                (netdev->features & GENMASK_ULL(63, 32)) >> 32;
3799}
3800
3801static void ena_timer_service(struct timer_list *t)
3802{
3803        struct ena_adapter *adapter = from_timer(adapter, t, timer_service);
3804        u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
3805        struct ena_admin_host_info *host_info =
3806                adapter->ena_dev->host_attr.host_info;
3807
3808        check_for_missing_keep_alive(adapter);
3809
3810        check_for_admin_com_state(adapter);
3811
3812        check_for_missing_completions(adapter);
3813
3814        check_for_empty_rx_ring(adapter);
3815
3816        if (debug_area)
3817                ena_dump_stats_to_buf(adapter, debug_area);
3818
3819        if (host_info)
3820                ena_update_host_info(host_info, adapter->netdev);
3821
3822        if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3823                netif_err(adapter, drv, adapter->netdev,
3824                          "Trigger reset is on\n");
3825                ena_dump_stats_to_dmesg(adapter);
3826                queue_work(ena_wq, &adapter->reset_task);
3827                return;
3828        }
3829
3830        /* Reset the timer */
3831        mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3832}
3833
3834static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
3835                                     struct ena_com_dev *ena_dev,
3836                                     struct ena_com_dev_get_features_ctx *get_feat_ctx)
3837{
3838        u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
3839
3840        if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3841                struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3842                        &get_feat_ctx->max_queue_ext.max_queue_ext;
3843                io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
3844                                  max_queue_ext->max_rx_cq_num);
3845
3846                io_tx_sq_num = max_queue_ext->max_tx_sq_num;
3847                io_tx_cq_num = max_queue_ext->max_tx_cq_num;
3848        } else {
3849                struct ena_admin_queue_feature_desc *max_queues =
3850                        &get_feat_ctx->max_queues;
3851                io_tx_sq_num = max_queues->max_sq_num;
3852                io_tx_cq_num = max_queues->max_cq_num;
3853                io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
3854        }
3855
3856        /* In case of LLQ use the llq fields for the tx SQ/CQ */
3857        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3858                io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
3859
3860        max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
3861        max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
3862        max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
3863        max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
3864        /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
3865        max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
3866        if (unlikely(!max_num_io_queues)) {
3867                dev_err(&pdev->dev, "The device doesn't have io queues\n");
3868                return -EFAULT;
3869        }
3870
3871        return max_num_io_queues;
3872}
3873
3874static int ena_set_queues_placement_policy(struct pci_dev *pdev,
3875                                           struct ena_com_dev *ena_dev,
3876                                           struct ena_admin_feature_llq_desc *llq,
3877                                           struct ena_llq_configurations *llq_default_configurations)
3878{
3879        bool has_mem_bar;
3880        int rc;
3881        u32 llq_feature_mask;
3882
3883        llq_feature_mask = 1 << ENA_ADMIN_LLQ;
3884        if (!(ena_dev->supported_features & llq_feature_mask)) {
3885                dev_err(&pdev->dev,
3886                        "LLQ is not supported Fallback to host mode policy.\n");
3887                ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3888                return 0;
3889        }
3890
3891        has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR);
3892
3893        rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
3894        if (unlikely(rc)) {
3895                dev_err(&pdev->dev,
3896                        "Failed to configure the device mode.  Fallback to host mode policy.\n");
3897                ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3898                return 0;
3899        }
3900
3901        /* Nothing to config, exit */
3902        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
3903                return 0;
3904
3905        if (!has_mem_bar) {
3906                dev_err(&pdev->dev,
3907                        "ENA device does not expose LLQ bar. Fallback to host mode policy.\n");
3908                ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3909                return 0;
3910        }
3911
3912        ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3913                                           pci_resource_start(pdev, ENA_MEM_BAR),
3914                                           pci_resource_len(pdev, ENA_MEM_BAR));
3915
3916        if (!ena_dev->mem_bar)
3917                return -EFAULT;
3918
3919        return 0;
3920}
3921
3922static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
3923                                 struct net_device *netdev)
3924{
3925        netdev_features_t dev_features = 0;
3926
3927        /* Set offload features */
3928        if (feat->offload.tx &
3929                ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
3930                dev_features |= NETIF_F_IP_CSUM;
3931
3932        if (feat->offload.tx &
3933                ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
3934                dev_features |= NETIF_F_IPV6_CSUM;
3935
3936        if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
3937                dev_features |= NETIF_F_TSO;
3938
3939        if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
3940                dev_features |= NETIF_F_TSO6;
3941
3942        if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
3943                dev_features |= NETIF_F_TSO_ECN;
3944
3945        if (feat->offload.rx_supported &
3946                ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
3947                dev_features |= NETIF_F_RXCSUM;
3948
3949        if (feat->offload.rx_supported &
3950                ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
3951                dev_features |= NETIF_F_RXCSUM;
3952
3953        netdev->features =
3954                dev_features |
3955                NETIF_F_SG |
3956                NETIF_F_RXHASH |
3957                NETIF_F_HIGHDMA;
3958
3959        netdev->hw_features |= netdev->features;
3960        netdev->vlan_features |= netdev->features;
3961}
3962
3963static void ena_set_conf_feat_params(struct ena_adapter *adapter,
3964                                     struct ena_com_dev_get_features_ctx *feat)
3965{
3966        struct net_device *netdev = adapter->netdev;
3967
3968        /* Copy mac address */
3969        if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
3970                eth_hw_addr_random(netdev);
3971                ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
3972        } else {
3973                ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
3974                ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
3975        }
3976
3977        /* Set offload features */
3978        ena_set_dev_offloads(feat, netdev);
3979
3980        adapter->max_mtu = feat->dev_attr.max_mtu;
3981        netdev->max_mtu = adapter->max_mtu;
3982        netdev->min_mtu = ENA_MIN_MTU;
3983}
3984
3985static int ena_rss_init_default(struct ena_adapter *adapter)
3986{
3987        struct ena_com_dev *ena_dev = adapter->ena_dev;
3988        struct device *dev = &adapter->pdev->dev;
3989        int rc, i;
3990        u32 val;
3991
3992        rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
3993        if (unlikely(rc)) {
3994                dev_err(dev, "Cannot init indirect table\n");
3995                goto err_rss_init;
3996        }
3997
3998        for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3999                val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
4000                rc = ena_com_indirect_table_fill_entry(ena_dev, i,
4001                                                       ENA_IO_RXQ_IDX(val));
4002                if (unlikely(rc && (rc != -EOPNOTSUPP))) {
4003                        dev_err(dev, "Cannot fill indirect table\n");
4004                        goto err_fill_indir;
4005                }
4006        }
4007
4008        rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL,
4009                                        ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
4010        if (unlikely(rc && (rc != -EOPNOTSUPP))) {
4011                dev_err(dev, "Cannot fill hash function\n");
4012                goto err_fill_indir;
4013        }
4014
4015        rc = ena_com_set_default_hash_ctrl(ena_dev);
4016        if (unlikely(rc && (rc != -EOPNOTSUPP))) {
4017                dev_err(dev, "Cannot fill hash control\n");
4018                goto err_fill_indir;
4019        }
4020
4021        return 0;
4022
4023err_fill_indir:
4024        ena_com_rss_destroy(ena_dev);
4025err_rss_init:
4026
4027        return rc;
4028}
4029
4030static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
4031{
4032        int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
4033
4034        pci_release_selected_regions(pdev, release_bars);
4035}
4036
4037static void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
4038{
4039        llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
4040        llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
4041        llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
4042        llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
4043        llq_config->llq_ring_entry_size_value = 128;
4044}
4045
4046static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
4047{
4048        struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
4049        struct ena_com_dev *ena_dev = ctx->ena_dev;
4050        u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
4051        u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
4052        u32 max_tx_queue_size;
4053        u32 max_rx_queue_size;
4054
4055        if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
4056                struct ena_admin_queue_ext_feature_fields *max_queue_ext =
4057                        &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
4058                max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
4059                                          max_queue_ext->max_rx_sq_depth);
4060                max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
4061
4062                if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4063                        max_tx_queue_size = min_t(u32, max_tx_queue_size,
4064                                                  llq->max_llq_depth);
4065                else
4066                        max_tx_queue_size = min_t(u32, max_tx_queue_size,
4067                                                  max_queue_ext->max_tx_sq_depth);
4068
4069                ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4070                                             max_queue_ext->max_per_packet_tx_descs);
4071                ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4072                                             max_queue_ext->max_per_packet_rx_descs);
4073        } else {
4074                struct ena_admin_queue_feature_desc *max_queues =
4075                        &ctx->get_feat_ctx->max_queues;
4076                max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
4077                                          max_queues->max_sq_depth);
4078                max_tx_queue_size = max_queues->max_cq_depth;
4079
4080                if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4081                        max_tx_queue_size = min_t(u32, max_tx_queue_size,
4082                                                  llq->max_llq_depth);
4083                else
4084                        max_tx_queue_size = min_t(u32, max_tx_queue_size,
4085                                                  max_queues->max_sq_depth);
4086
4087                ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4088                                             max_queues->max_packet_tx_descs);
4089                ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4090                                             max_queues->max_packet_rx_descs);
4091        }
4092
4093        max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
4094        max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
4095
4096        tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
4097                                  max_tx_queue_size);
4098        rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
4099                                  max_rx_queue_size);
4100
4101        tx_queue_size = rounddown_pow_of_two(tx_queue_size);
4102        rx_queue_size = rounddown_pow_of_two(rx_queue_size);
4103
4104        ctx->max_tx_queue_size = max_tx_queue_size;
4105        ctx->max_rx_queue_size = max_rx_queue_size;
4106        ctx->tx_queue_size = tx_queue_size;
4107        ctx->rx_queue_size = rx_queue_size;
4108
4109        return 0;
4110}
4111
4112/* ena_probe - Device Initialization Routine
4113 * @pdev: PCI device information struct
4114 * @ent: entry in ena_pci_tbl
4115 *
4116 * Returns 0 on success, negative on failure
4117 *
4118 * ena_probe initializes an adapter identified by a pci_dev structure.
4119 * The OS initialization, configuring of the adapter private structure,
4120 * and a hardware reset occur.
4121 */
4122static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
4123{
4124        struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
4125        struct ena_com_dev_get_features_ctx get_feat_ctx;
4126        struct ena_llq_configurations llq_config;
4127        struct ena_com_dev *ena_dev = NULL;
4128        struct ena_adapter *adapter;
4129        struct net_device *netdev;
4130        static int adapters_found;
4131        u32 max_num_io_queues;
4132        char *queue_type_str;
4133        bool wd_state;
4134        int bars, rc;
4135
4136        dev_dbg(&pdev->dev, "%s\n", __func__);
4137
4138        rc = pci_enable_device_mem(pdev);
4139        if (rc) {
4140                dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
4141                return rc;
4142        }
4143
4144        pci_set_master(pdev);
4145
4146        ena_dev = vzalloc(sizeof(*ena_dev));
4147        if (!ena_dev) {
4148                rc = -ENOMEM;
4149                goto err_disable_device;
4150        }
4151
4152        bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
4153        rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
4154        if (rc) {
4155                dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
4156                        rc);
4157                goto err_free_ena_dev;
4158        }
4159
4160        ena_dev->reg_bar = devm_ioremap(&pdev->dev,
4161                                        pci_resource_start(pdev, ENA_REG_BAR),
4162                                        pci_resource_len(pdev, ENA_REG_BAR));
4163        if (!ena_dev->reg_bar) {
4164                dev_err(&pdev->dev, "failed to remap regs bar\n");
4165                rc = -EFAULT;
4166                goto err_free_region;
4167        }
4168
4169        ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US;
4170
4171        ena_dev->dmadev = &pdev->dev;
4172
4173        rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
4174        if (rc) {
4175                dev_err(&pdev->dev, "ena device init failed\n");
4176                if (rc == -ETIME)
4177                        rc = -EPROBE_DEFER;
4178                goto err_free_region;
4179        }
4180
4181        set_default_llq_configurations(&llq_config);
4182
4183        rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq,
4184                                             &llq_config);
4185        if (rc) {
4186                dev_err(&pdev->dev, "ena device init failed\n");
4187                goto err_device_destroy;
4188        }
4189
4190        calc_queue_ctx.ena_dev = ena_dev;
4191        calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
4192        calc_queue_ctx.pdev = pdev;
4193
4194        /* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
4195         * Updated during device initialization with the real granularity
4196         */
4197        ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
4198        ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
4199        ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
4200        max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
4201        rc = ena_calc_io_queue_size(&calc_queue_ctx);
4202        if (rc || !max_num_io_queues) {
4203                rc = -EFAULT;
4204                goto err_device_destroy;
4205        }
4206
4207        /* dev zeroed in init_etherdev */
4208        netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), max_num_io_queues);
4209        if (!netdev) {
4210                dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
4211                rc = -ENOMEM;
4212                goto err_device_destroy;
4213        }
4214
4215        SET_NETDEV_DEV(netdev, &pdev->dev);
4216
4217        adapter = netdev_priv(netdev);
4218        pci_set_drvdata(pdev, adapter);
4219
4220        adapter->ena_dev = ena_dev;
4221        adapter->netdev = netdev;
4222        adapter->pdev = pdev;
4223
4224        ena_set_conf_feat_params(adapter, &get_feat_ctx);
4225
4226        adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
4227        adapter->reset_reason = ENA_REGS_RESET_NORMAL;
4228
4229        adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
4230        adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
4231        adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
4232        adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
4233        adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
4234        adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
4235
4236        adapter->num_io_queues = max_num_io_queues;
4237        adapter->max_num_io_queues = max_num_io_queues;
4238        adapter->last_monitored_tx_qid = 0;
4239
4240        adapter->xdp_first_ring = 0;
4241        adapter->xdp_num_queues = 0;
4242
4243        adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
4244        adapter->wd_state = wd_state;
4245
4246        snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
4247
4248        rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
4249        if (rc) {
4250                dev_err(&pdev->dev,
4251                        "Failed to query interrupt moderation feature\n");
4252                goto err_netdev_destroy;
4253        }
4254        ena_init_io_rings(adapter,
4255                          0,
4256                          adapter->xdp_num_queues +
4257                          adapter->num_io_queues);
4258
4259        netdev->netdev_ops = &ena_netdev_ops;
4260        netdev->watchdog_timeo = TX_TIMEOUT;
4261        ena_set_ethtool_ops(netdev);
4262
4263        netdev->priv_flags |= IFF_UNICAST_FLT;
4264
4265        u64_stats_init(&adapter->syncp);
4266
4267        rc = ena_enable_msix_and_set_admin_interrupts(adapter);
4268        if (rc) {
4269                dev_err(&pdev->dev,
4270                        "Failed to enable and set the admin interrupts\n");
4271                goto err_worker_destroy;
4272        }
4273        rc = ena_rss_init_default(adapter);
4274        if (rc && (rc != -EOPNOTSUPP)) {
4275                dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
4276                goto err_free_msix;
4277        }
4278
4279        ena_config_debug_area(adapter);
4280
4281        memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
4282
4283        netif_carrier_off(netdev);
4284
4285        rc = register_netdev(netdev);
4286        if (rc) {
4287                dev_err(&pdev->dev, "Cannot register net device\n");
4288                goto err_rss;
4289        }
4290
4291        INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
4292
4293        adapter->last_keep_alive_jiffies = jiffies;
4294        adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
4295        adapter->missing_tx_completion_to = TX_TIMEOUT;
4296        adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
4297
4298        ena_update_hints(adapter, &get_feat_ctx.hw_hints);
4299
4300        timer_setup(&adapter->timer_service, ena_timer_service, 0);
4301        mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
4302
4303        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
4304                queue_type_str = "Regular";
4305        else
4306                queue_type_str = "Low Latency";
4307
4308        dev_info(&pdev->dev,
4309                 "%s found at mem %lx, mac addr %pM, Placement policy: %s\n",
4310                 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
4311                 netdev->dev_addr, queue_type_str);
4312
4313        set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
4314
4315        adapters_found++;
4316
4317        return 0;
4318
4319err_rss:
4320        ena_com_delete_debug_area(ena_dev);
4321        ena_com_rss_destroy(ena_dev);
4322err_free_msix:
4323        ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
4324        /* stop submitting admin commands on a device that was reset */
4325        ena_com_set_admin_running_state(ena_dev, false);
4326        ena_free_mgmnt_irq(adapter);
4327        ena_disable_msix(adapter);
4328err_worker_destroy:
4329        del_timer(&adapter->timer_service);
4330err_netdev_destroy:
4331        free_netdev(netdev);
4332err_device_destroy:
4333        ena_com_delete_host_info(ena_dev);
4334        ena_com_admin_destroy(ena_dev);
4335err_free_region:
4336        ena_release_bars(ena_dev, pdev);
4337err_free_ena_dev:
4338        vfree(ena_dev);
4339err_disable_device:
4340        pci_disable_device(pdev);
4341        return rc;
4342}
4343
4344/*****************************************************************************/
4345
4346/* __ena_shutoff - Helper used in both PCI remove/shutdown routines
4347 * @pdev: PCI device information struct
4348 * @shutdown: Is it a shutdown operation? If false, means it is a removal
4349 *
4350 * __ena_shutoff is a helper routine that does the real work on shutdown and
4351 * removal paths; the difference between those paths is with regards to whether
4352 * dettach or unregister the netdevice.
4353 */
4354static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
4355{
4356        struct ena_adapter *adapter = pci_get_drvdata(pdev);
4357        struct ena_com_dev *ena_dev;
4358        struct net_device *netdev;
4359
4360        ena_dev = adapter->ena_dev;
4361        netdev = adapter->netdev;
4362
4363#ifdef CONFIG_RFS_ACCEL
4364        if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
4365                free_irq_cpu_rmap(netdev->rx_cpu_rmap);
4366                netdev->rx_cpu_rmap = NULL;
4367        }
4368#endif /* CONFIG_RFS_ACCEL */
4369        del_timer_sync(&adapter->timer_service);
4370
4371        cancel_work_sync(&adapter->reset_task);
4372
4373        rtnl_lock(); /* lock released inside the below if-else block */
4374        adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
4375        ena_destroy_device(adapter, true);
4376        if (shutdown) {
4377                netif_device_detach(netdev);
4378                dev_close(netdev);
4379                rtnl_unlock();
4380        } else {
4381                rtnl_unlock();
4382                unregister_netdev(netdev);
4383                free_netdev(netdev);
4384        }
4385
4386        ena_com_rss_destroy(ena_dev);
4387
4388        ena_com_delete_debug_area(ena_dev);
4389
4390        ena_com_delete_host_info(ena_dev);
4391
4392        ena_release_bars(ena_dev, pdev);
4393
4394        pci_disable_device(pdev);
4395
4396        vfree(ena_dev);
4397}
4398
4399/* ena_remove - Device Removal Routine
4400 * @pdev: PCI device information struct
4401 *
4402 * ena_remove is called by the PCI subsystem to alert the driver
4403 * that it should release a PCI device.
4404 */
4405
4406static void ena_remove(struct pci_dev *pdev)
4407{
4408        __ena_shutoff(pdev, false);
4409}
4410
4411/* ena_shutdown - Device Shutdown Routine
4412 * @pdev: PCI device information struct
4413 *
4414 * ena_shutdown is called by the PCI subsystem to alert the driver that
4415 * a shutdown/reboot (or kexec) is happening and device must be disabled.
4416 */
4417
4418static void ena_shutdown(struct pci_dev *pdev)
4419{
4420        __ena_shutoff(pdev, true);
4421}
4422
4423#ifdef CONFIG_PM
4424/* ena_suspend - PM suspend callback
4425 * @pdev: PCI device information struct
4426 * @state:power state
4427 */
4428static int ena_suspend(struct pci_dev *pdev,  pm_message_t state)
4429{
4430        struct ena_adapter *adapter = pci_get_drvdata(pdev);
4431
4432        u64_stats_update_begin(&adapter->syncp);
4433        adapter->dev_stats.suspend++;
4434        u64_stats_update_end(&adapter->syncp);
4435
4436        rtnl_lock();
4437        if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
4438                dev_err(&pdev->dev,
4439                        "ignoring device reset request as the device is being suspended\n");
4440                clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
4441        }
4442        ena_destroy_device(adapter, true);
4443        rtnl_unlock();
4444        return 0;
4445}
4446
4447/* ena_resume - PM resume callback
4448 * @pdev: PCI device information struct
4449 *
4450 */
4451static int ena_resume(struct pci_dev *pdev)
4452{
4453        struct ena_adapter *adapter = pci_get_drvdata(pdev);
4454        int rc;
4455
4456        u64_stats_update_begin(&adapter->syncp);
4457        adapter->dev_stats.resume++;
4458        u64_stats_update_end(&adapter->syncp);
4459
4460        rtnl_lock();
4461        rc = ena_restore_device(adapter);
4462        rtnl_unlock();
4463        return rc;
4464}
4465#endif
4466
4467static struct pci_driver ena_pci_driver = {
4468        .name           = DRV_MODULE_NAME,
4469        .id_table       = ena_pci_tbl,
4470        .probe          = ena_probe,
4471        .remove         = ena_remove,
4472        .shutdown       = ena_shutdown,
4473#ifdef CONFIG_PM
4474        .suspend    = ena_suspend,
4475        .resume     = ena_resume,
4476#endif
4477        .sriov_configure = pci_sriov_configure_simple,
4478};
4479
4480static int __init ena_init(void)
4481{
4482        ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
4483        if (!ena_wq) {
4484                pr_err("Failed to create workqueue\n");
4485                return -ENOMEM;
4486        }
4487
4488        return pci_register_driver(&ena_pci_driver);
4489}
4490
4491static void __exit ena_cleanup(void)
4492{
4493        pci_unregister_driver(&ena_pci_driver);
4494
4495        if (ena_wq) {
4496                destroy_workqueue(ena_wq);
4497                ena_wq = NULL;
4498        }
4499}
4500
4501/******************************************************************************
4502 ******************************** AENQ Handlers *******************************
4503 *****************************************************************************/
4504/* ena_update_on_link_change:
4505 * Notify the network interface about the change in link status
4506 */
4507static void ena_update_on_link_change(void *adapter_data,
4508                                      struct ena_admin_aenq_entry *aenq_e)
4509{
4510        struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4511        struct ena_admin_aenq_link_change_desc *aenq_desc =
4512                (struct ena_admin_aenq_link_change_desc *)aenq_e;
4513        int status = aenq_desc->flags &
4514                ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
4515
4516        if (status) {
4517                netdev_dbg(adapter->netdev, "%s\n", __func__);
4518                set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4519                if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
4520                        netif_carrier_on(adapter->netdev);
4521        } else {
4522                clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4523                netif_carrier_off(adapter->netdev);
4524        }
4525}
4526
4527static void ena_keep_alive_wd(void *adapter_data,
4528                              struct ena_admin_aenq_entry *aenq_e)
4529{
4530        struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4531        struct ena_admin_aenq_keep_alive_desc *desc;
4532        u64 rx_drops;
4533        u64 tx_drops;
4534
4535        desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
4536        adapter->last_keep_alive_jiffies = jiffies;
4537
4538        rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
4539        tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low;
4540
4541        u64_stats_update_begin(&adapter->syncp);
4542        adapter->dev_stats.rx_drops = rx_drops;
4543        adapter->dev_stats.tx_drops = tx_drops;
4544        u64_stats_update_end(&adapter->syncp);
4545}
4546
4547static void ena_notification(void *adapter_data,
4548                             struct ena_admin_aenq_entry *aenq_e)
4549{
4550        struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4551        struct ena_admin_ena_hw_hints *hints;
4552
4553        WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
4554             "Invalid group(%x) expected %x\n",
4555             aenq_e->aenq_common_desc.group,
4556             ENA_ADMIN_NOTIFICATION);
4557
4558        switch (aenq_e->aenq_common_desc.syndrom) {
4559        case ENA_ADMIN_UPDATE_HINTS:
4560                hints = (struct ena_admin_ena_hw_hints *)
4561                        (&aenq_e->inline_data_w4);
4562                ena_update_hints(adapter, hints);
4563                break;
4564        default:
4565                netif_err(adapter, drv, adapter->netdev,
4566                          "Invalid aenq notification link state %d\n",
4567                          aenq_e->aenq_common_desc.syndrom);
4568        }
4569}
4570
4571/* This handler will called for unknown event group or unimplemented handlers*/
4572static void unimplemented_aenq_handler(void *data,
4573                                       struct ena_admin_aenq_entry *aenq_e)
4574{
4575        struct ena_adapter *adapter = (struct ena_adapter *)data;
4576
4577        netif_err(adapter, drv, adapter->netdev,
4578                  "Unknown event was received or event with unimplemented handler\n");
4579}
4580
4581static struct ena_aenq_handlers aenq_handlers = {
4582        .handlers = {
4583                [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4584                [ENA_ADMIN_NOTIFICATION] = ena_notification,
4585                [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4586        },
4587        .unimplemented_handler = unimplemented_aenq_handler
4588};
4589
4590module_init(ena_init);
4591module_exit(ena_cleanup);
4592