linux/drivers/net/ethernet/amazon/ena/ena_netdev.c
<<
>>
Prefs
   1/*
   2 * Copyright 2015 Amazon.com, Inc. or its affiliates.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  34
  35#ifdef CONFIG_RFS_ACCEL
  36#include <linux/cpu_rmap.h>
  37#endif /* CONFIG_RFS_ACCEL */
  38#include <linux/ethtool.h>
  39#include <linux/if_vlan.h>
  40#include <linux/kernel.h>
  41#include <linux/module.h>
  42#include <linux/moduleparam.h>
  43#include <linux/numa.h>
  44#include <linux/pci.h>
  45#include <linux/utsname.h>
  46#include <linux/version.h>
  47#include <linux/vmalloc.h>
  48#include <net/ip.h>
  49
  50#include "ena_netdev.h"
  51#include "ena_pci_id_tbl.h"
  52
  53static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n";
  54
  55MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
  56MODULE_DESCRIPTION(DEVICE_NAME);
  57MODULE_LICENSE("GPL");
  58MODULE_VERSION(DRV_MODULE_VERSION);
  59
  60/* Time in jiffies before concluding the transmitter is hung. */
  61#define TX_TIMEOUT  (5 * HZ)
  62
  63#define ENA_NAPI_BUDGET 64
  64
  65#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
  66                NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
  67static int debug = -1;
  68module_param(debug, int, 0);
  69MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
  70
  71static struct ena_aenq_handlers aenq_handlers;
  72
  73static struct workqueue_struct *ena_wq;
  74
  75MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
  76
  77static int ena_rss_init_default(struct ena_adapter *adapter);
  78
  79static void ena_tx_timeout(struct net_device *dev)
  80{
  81        struct ena_adapter *adapter = netdev_priv(dev);
  82
  83        /* Change the state of the device to trigger reset
  84         * Check that we are not in the middle or a trigger already
  85         */
  86
  87        if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
  88                return;
  89
  90        adapter->reset_reason = ENA_REGS_RESET_OS_NETDEV_WD;
  91        u64_stats_update_begin(&adapter->syncp);
  92        adapter->dev_stats.tx_timeout++;
  93        u64_stats_update_end(&adapter->syncp);
  94
  95        netif_err(adapter, tx_err, dev, "Transmit time out\n");
  96}
  97
  98static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
  99{
 100        int i;
 101
 102        for (i = 0; i < adapter->num_queues; i++)
 103                adapter->rx_ring[i].mtu = mtu;
 104}
 105
 106static int ena_change_mtu(struct net_device *dev, int new_mtu)
 107{
 108        struct ena_adapter *adapter = netdev_priv(dev);
 109        int ret;
 110
 111        ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
 112        if (!ret) {
 113                netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
 114                update_rx_ring_mtu(adapter, new_mtu);
 115                dev->mtu = new_mtu;
 116        } else {
 117                netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
 118                          new_mtu);
 119        }
 120
 121        return ret;
 122}
 123
 124static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
 125{
 126#ifdef CONFIG_RFS_ACCEL
 127        u32 i;
 128        int rc;
 129
 130        adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues);
 131        if (!adapter->netdev->rx_cpu_rmap)
 132                return -ENOMEM;
 133        for (i = 0; i < adapter->num_queues; i++) {
 134                int irq_idx = ENA_IO_IRQ_IDX(i);
 135
 136                rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
 137                                      pci_irq_vector(adapter->pdev, irq_idx));
 138                if (rc) {
 139                        free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
 140                        adapter->netdev->rx_cpu_rmap = NULL;
 141                        return rc;
 142                }
 143        }
 144#endif /* CONFIG_RFS_ACCEL */
 145        return 0;
 146}
 147
 148static void ena_init_io_rings_common(struct ena_adapter *adapter,
 149                                     struct ena_ring *ring, u16 qid)
 150{
 151        ring->qid = qid;
 152        ring->pdev = adapter->pdev;
 153        ring->dev = &adapter->pdev->dev;
 154        ring->netdev = adapter->netdev;
 155        ring->napi = &adapter->ena_napi[qid].napi;
 156        ring->adapter = adapter;
 157        ring->ena_dev = adapter->ena_dev;
 158        ring->per_napi_packets = 0;
 159        ring->per_napi_bytes = 0;
 160        ring->cpu = 0;
 161        u64_stats_init(&ring->syncp);
 162}
 163
 164static void ena_init_io_rings(struct ena_adapter *adapter)
 165{
 166        struct ena_com_dev *ena_dev;
 167        struct ena_ring *txr, *rxr;
 168        int i;
 169
 170        ena_dev = adapter->ena_dev;
 171
 172        for (i = 0; i < adapter->num_queues; i++) {
 173                txr = &adapter->tx_ring[i];
 174                rxr = &adapter->rx_ring[i];
 175
 176                /* TX/RX common ring state */
 177                ena_init_io_rings_common(adapter, txr, i);
 178                ena_init_io_rings_common(adapter, rxr, i);
 179
 180                /* TX specific ring state */
 181                txr->ring_size = adapter->tx_ring_size;
 182                txr->tx_max_header_size = ena_dev->tx_max_header_size;
 183                txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
 184                txr->sgl_size = adapter->max_tx_sgl_size;
 185                txr->smoothed_interval =
 186                        ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
 187
 188                /* RX specific ring state */
 189                rxr->ring_size = adapter->rx_ring_size;
 190                rxr->rx_copybreak = adapter->rx_copybreak;
 191                rxr->sgl_size = adapter->max_rx_sgl_size;
 192                rxr->smoothed_interval =
 193                        ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
 194                rxr->empty_rx_queue = 0;
 195        }
 196}
 197
 198/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
 199 * @adapter: network interface device structure
 200 * @qid: queue index
 201 *
 202 * Return 0 on success, negative on failure
 203 */
 204static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
 205{
 206        struct ena_ring *tx_ring = &adapter->tx_ring[qid];
 207        struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
 208        int size, i, node;
 209
 210        if (tx_ring->tx_buffer_info) {
 211                netif_err(adapter, ifup,
 212                          adapter->netdev, "tx_buffer_info info is not NULL");
 213                return -EEXIST;
 214        }
 215
 216        size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
 217        node = cpu_to_node(ena_irq->cpu);
 218
 219        tx_ring->tx_buffer_info = vzalloc_node(size, node);
 220        if (!tx_ring->tx_buffer_info) {
 221                tx_ring->tx_buffer_info = vzalloc(size);
 222                if (!tx_ring->tx_buffer_info)
 223                        return -ENOMEM;
 224        }
 225
 226        size = sizeof(u16) * tx_ring->ring_size;
 227        tx_ring->free_tx_ids = vzalloc_node(size, node);
 228        if (!tx_ring->free_tx_ids) {
 229                tx_ring->free_tx_ids = vzalloc(size);
 230                if (!tx_ring->free_tx_ids) {
 231                        vfree(tx_ring->tx_buffer_info);
 232                        return -ENOMEM;
 233                }
 234        }
 235
 236        /* Req id ring for TX out of order completions */
 237        for (i = 0; i < tx_ring->ring_size; i++)
 238                tx_ring->free_tx_ids[i] = i;
 239
 240        /* Reset tx statistics */
 241        memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
 242
 243        tx_ring->next_to_use = 0;
 244        tx_ring->next_to_clean = 0;
 245        tx_ring->cpu = ena_irq->cpu;
 246        return 0;
 247}
 248
 249/* ena_free_tx_resources - Free I/O Tx Resources per Queue
 250 * @adapter: network interface device structure
 251 * @qid: queue index
 252 *
 253 * Free all transmit software resources
 254 */
 255static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
 256{
 257        struct ena_ring *tx_ring = &adapter->tx_ring[qid];
 258
 259        vfree(tx_ring->tx_buffer_info);
 260        tx_ring->tx_buffer_info = NULL;
 261
 262        vfree(tx_ring->free_tx_ids);
 263        tx_ring->free_tx_ids = NULL;
 264}
 265
 266/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues
 267 * @adapter: private structure
 268 *
 269 * Return 0 on success, negative on failure
 270 */
 271static int ena_setup_all_tx_resources(struct ena_adapter *adapter)
 272{
 273        int i, rc = 0;
 274
 275        for (i = 0; i < adapter->num_queues; i++) {
 276                rc = ena_setup_tx_resources(adapter, i);
 277                if (rc)
 278                        goto err_setup_tx;
 279        }
 280
 281        return 0;
 282
 283err_setup_tx:
 284
 285        netif_err(adapter, ifup, adapter->netdev,
 286                  "Tx queue %d: allocation failed\n", i);
 287
 288        /* rewind the index freeing the rings as we go */
 289        while (i--)
 290                ena_free_tx_resources(adapter, i);
 291        return rc;
 292}
 293
 294/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
 295 * @adapter: board private structure
 296 *
 297 * Free all transmit software resources
 298 */
 299static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
 300{
 301        int i;
 302
 303        for (i = 0; i < adapter->num_queues; i++)
 304                ena_free_tx_resources(adapter, i);
 305}
 306
 307static inline int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
 308{
 309        if (likely(req_id < rx_ring->ring_size))
 310                return 0;
 311
 312        netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
 313                  "Invalid rx req_id: %hu\n", req_id);
 314
 315        u64_stats_update_begin(&rx_ring->syncp);
 316        rx_ring->rx_stats.bad_req_id++;
 317        u64_stats_update_end(&rx_ring->syncp);
 318
 319        /* Trigger device reset */
 320        rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
 321        set_bit(ENA_FLAG_TRIGGER_RESET, &rx_ring->adapter->flags);
 322        return -EFAULT;
 323}
 324
 325/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
 326 * @adapter: network interface device structure
 327 * @qid: queue index
 328 *
 329 * Returns 0 on success, negative on failure
 330 */
 331static int ena_setup_rx_resources(struct ena_adapter *adapter,
 332                                  u32 qid)
 333{
 334        struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 335        struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
 336        int size, node, i;
 337
 338        if (rx_ring->rx_buffer_info) {
 339                netif_err(adapter, ifup, adapter->netdev,
 340                          "rx_buffer_info is not NULL");
 341                return -EEXIST;
 342        }
 343
 344        /* alloc extra element so in rx path
 345         * we can always prefetch rx_info + 1
 346         */
 347        size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
 348        node = cpu_to_node(ena_irq->cpu);
 349
 350        rx_ring->rx_buffer_info = vzalloc_node(size, node);
 351        if (!rx_ring->rx_buffer_info) {
 352                rx_ring->rx_buffer_info = vzalloc(size);
 353                if (!rx_ring->rx_buffer_info)
 354                        return -ENOMEM;
 355        }
 356
 357        size = sizeof(u16) * rx_ring->ring_size;
 358        rx_ring->free_rx_ids = vzalloc_node(size, node);
 359        if (!rx_ring->free_rx_ids) {
 360                rx_ring->free_rx_ids = vzalloc(size);
 361                if (!rx_ring->free_rx_ids) {
 362                        vfree(rx_ring->rx_buffer_info);
 363                        return -ENOMEM;
 364                }
 365        }
 366
 367        /* Req id ring for receiving RX pkts out of order */
 368        for (i = 0; i < rx_ring->ring_size; i++)
 369                rx_ring->free_rx_ids[i] = i;
 370
 371        /* Reset rx statistics */
 372        memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
 373
 374        rx_ring->next_to_clean = 0;
 375        rx_ring->next_to_use = 0;
 376        rx_ring->cpu = ena_irq->cpu;
 377
 378        return 0;
 379}
 380
 381/* ena_free_rx_resources - Free I/O Rx Resources
 382 * @adapter: network interface device structure
 383 * @qid: queue index
 384 *
 385 * Free all receive software resources
 386 */
 387static void ena_free_rx_resources(struct ena_adapter *adapter,
 388                                  u32 qid)
 389{
 390        struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 391
 392        vfree(rx_ring->rx_buffer_info);
 393        rx_ring->rx_buffer_info = NULL;
 394
 395        vfree(rx_ring->free_rx_ids);
 396        rx_ring->free_rx_ids = NULL;
 397}
 398
 399/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
 400 * @adapter: board private structure
 401 *
 402 * Return 0 on success, negative on failure
 403 */
 404static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
 405{
 406        int i, rc = 0;
 407
 408        for (i = 0; i < adapter->num_queues; i++) {
 409                rc = ena_setup_rx_resources(adapter, i);
 410                if (rc)
 411                        goto err_setup_rx;
 412        }
 413
 414        return 0;
 415
 416err_setup_rx:
 417
 418        netif_err(adapter, ifup, adapter->netdev,
 419                  "Rx queue %d: allocation failed\n", i);
 420
 421        /* rewind the index freeing the rings as we go */
 422        while (i--)
 423                ena_free_rx_resources(adapter, i);
 424        return rc;
 425}
 426
 427/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
 428 * @adapter: board private structure
 429 *
 430 * Free all receive software resources
 431 */
 432static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 433{
 434        int i;
 435
 436        for (i = 0; i < adapter->num_queues; i++)
 437                ena_free_rx_resources(adapter, i);
 438}
 439
 440static inline int ena_alloc_rx_page(struct ena_ring *rx_ring,
 441                                    struct ena_rx_buffer *rx_info, gfp_t gfp)
 442{
 443        struct ena_com_buf *ena_buf;
 444        struct page *page;
 445        dma_addr_t dma;
 446
 447        /* if previous allocated page is not used */
 448        if (unlikely(rx_info->page))
 449                return 0;
 450
 451        page = alloc_page(gfp);
 452        if (unlikely(!page)) {
 453                u64_stats_update_begin(&rx_ring->syncp);
 454                rx_ring->rx_stats.page_alloc_fail++;
 455                u64_stats_update_end(&rx_ring->syncp);
 456                return -ENOMEM;
 457        }
 458
 459        dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE,
 460                           DMA_FROM_DEVICE);
 461        if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
 462                u64_stats_update_begin(&rx_ring->syncp);
 463                rx_ring->rx_stats.dma_mapping_err++;
 464                u64_stats_update_end(&rx_ring->syncp);
 465
 466                __free_page(page);
 467                return -EIO;
 468        }
 469        netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 470                  "alloc page %p, rx_info %p\n", page, rx_info);
 471
 472        rx_info->page = page;
 473        rx_info->page_offset = 0;
 474        ena_buf = &rx_info->ena_buf;
 475        ena_buf->paddr = dma;
 476        ena_buf->len = PAGE_SIZE;
 477
 478        return 0;
 479}
 480
 481static void ena_free_rx_page(struct ena_ring *rx_ring,
 482                             struct ena_rx_buffer *rx_info)
 483{
 484        struct page *page = rx_info->page;
 485        struct ena_com_buf *ena_buf = &rx_info->ena_buf;
 486
 487        if (unlikely(!page)) {
 488                netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
 489                           "Trying to free unallocated buffer\n");
 490                return;
 491        }
 492
 493        dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE,
 494                       DMA_FROM_DEVICE);
 495
 496        __free_page(page);
 497        rx_info->page = NULL;
 498}
 499
 500static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 501{
 502        u16 next_to_use, req_id;
 503        u32 i;
 504        int rc;
 505
 506        next_to_use = rx_ring->next_to_use;
 507
 508        for (i = 0; i < num; i++) {
 509                struct ena_rx_buffer *rx_info;
 510
 511                req_id = rx_ring->free_rx_ids[next_to_use];
 512                rc = validate_rx_req_id(rx_ring, req_id);
 513                if (unlikely(rc < 0))
 514                        break;
 515
 516                rx_info = &rx_ring->rx_buffer_info[req_id];
 517
 518
 519                rc = ena_alloc_rx_page(rx_ring, rx_info,
 520                                       __GFP_COLD | GFP_ATOMIC | __GFP_COMP);
 521                if (unlikely(rc < 0)) {
 522                        netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
 523                                   "failed to alloc buffer for rx queue %d\n",
 524                                   rx_ring->qid);
 525                        break;
 526                }
 527                rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
 528                                                &rx_info->ena_buf,
 529                                                req_id);
 530                if (unlikely(rc)) {
 531                        netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
 532                                   "failed to add buffer for rx queue %d\n",
 533                                   rx_ring->qid);
 534                        break;
 535                }
 536                next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
 537                                                   rx_ring->ring_size);
 538        }
 539
 540        if (unlikely(i < num)) {
 541                u64_stats_update_begin(&rx_ring->syncp);
 542                rx_ring->rx_stats.refil_partial++;
 543                u64_stats_update_end(&rx_ring->syncp);
 544                netdev_warn(rx_ring->netdev,
 545                            "refilled rx qid %d with only %d buffers (from %d)\n",
 546                            rx_ring->qid, i, num);
 547        }
 548
 549        if (likely(i)) {
 550                /* Add memory barrier to make sure the desc were written before
 551                 * issue a doorbell
 552                 */
 553                wmb();
 554                ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
 555        }
 556
 557        rx_ring->next_to_use = next_to_use;
 558
 559        return i;
 560}
 561
 562static void ena_free_rx_bufs(struct ena_adapter *adapter,
 563                             u32 qid)
 564{
 565        struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 566        u32 i;
 567
 568        for (i = 0; i < rx_ring->ring_size; i++) {
 569                struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
 570
 571                if (rx_info->page)
 572                        ena_free_rx_page(rx_ring, rx_info);
 573        }
 574}
 575
 576/* ena_refill_all_rx_bufs - allocate all queues Rx buffers
 577 * @adapter: board private structure
 578 *
 579 */
 580static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
 581{
 582        struct ena_ring *rx_ring;
 583        int i, rc, bufs_num;
 584
 585        for (i = 0; i < adapter->num_queues; i++) {
 586                rx_ring = &adapter->rx_ring[i];
 587                bufs_num = rx_ring->ring_size - 1;
 588                rc = ena_refill_rx_bufs(rx_ring, bufs_num);
 589
 590                if (unlikely(rc != bufs_num))
 591                        netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
 592                                   "refilling Queue %d failed. allocated %d buffers from: %d\n",
 593                                   i, rc, bufs_num);
 594        }
 595}
 596
 597static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
 598{
 599        int i;
 600
 601        for (i = 0; i < adapter->num_queues; i++)
 602                ena_free_rx_bufs(adapter, i);
 603}
 604
 605/* ena_free_tx_bufs - Free Tx Buffers per Queue
 606 * @tx_ring: TX ring for which buffers be freed
 607 */
 608static void ena_free_tx_bufs(struct ena_ring *tx_ring)
 609{
 610        bool print_once = true;
 611        u32 i;
 612
 613        for (i = 0; i < tx_ring->ring_size; i++) {
 614                struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
 615                struct ena_com_buf *ena_buf;
 616                int nr_frags;
 617                int j;
 618
 619                if (!tx_info->skb)
 620                        continue;
 621
 622                if (print_once) {
 623                        netdev_notice(tx_ring->netdev,
 624                                      "free uncompleted tx skb qid %d idx 0x%x\n",
 625                                      tx_ring->qid, i);
 626                        print_once = false;
 627                } else {
 628                        netdev_dbg(tx_ring->netdev,
 629                                   "free uncompleted tx skb qid %d idx 0x%x\n",
 630                                   tx_ring->qid, i);
 631                }
 632
 633                ena_buf = tx_info->bufs;
 634                dma_unmap_single(tx_ring->dev,
 635                                 ena_buf->paddr,
 636                                 ena_buf->len,
 637                                 DMA_TO_DEVICE);
 638
 639                /* unmap remaining mapped pages */
 640                nr_frags = tx_info->num_of_bufs - 1;
 641                for (j = 0; j < nr_frags; j++) {
 642                        ena_buf++;
 643                        dma_unmap_page(tx_ring->dev,
 644                                       ena_buf->paddr,
 645                                       ena_buf->len,
 646                                       DMA_TO_DEVICE);
 647                }
 648
 649                dev_kfree_skb_any(tx_info->skb);
 650        }
 651        netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
 652                                                  tx_ring->qid));
 653}
 654
 655static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
 656{
 657        struct ena_ring *tx_ring;
 658        int i;
 659
 660        for (i = 0; i < adapter->num_queues; i++) {
 661                tx_ring = &adapter->tx_ring[i];
 662                ena_free_tx_bufs(tx_ring);
 663        }
 664}
 665
 666static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
 667{
 668        u16 ena_qid;
 669        int i;
 670
 671        for (i = 0; i < adapter->num_queues; i++) {
 672                ena_qid = ENA_IO_TXQ_IDX(i);
 673                ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
 674        }
 675}
 676
 677static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
 678{
 679        u16 ena_qid;
 680        int i;
 681
 682        for (i = 0; i < adapter->num_queues; i++) {
 683                ena_qid = ENA_IO_RXQ_IDX(i);
 684                ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
 685        }
 686}
 687
 688static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
 689{
 690        ena_destroy_all_tx_queues(adapter);
 691        ena_destroy_all_rx_queues(adapter);
 692}
 693
 694static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
 695{
 696        struct ena_tx_buffer *tx_info = NULL;
 697
 698        if (likely(req_id < tx_ring->ring_size)) {
 699                tx_info = &tx_ring->tx_buffer_info[req_id];
 700                if (likely(tx_info->skb))
 701                        return 0;
 702        }
 703
 704        if (tx_info)
 705                netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
 706                          "tx_info doesn't have valid skb\n");
 707        else
 708                netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
 709                          "Invalid req_id: %hu\n", req_id);
 710
 711        u64_stats_update_begin(&tx_ring->syncp);
 712        tx_ring->tx_stats.bad_req_id++;
 713        u64_stats_update_end(&tx_ring->syncp);
 714
 715        /* Trigger device reset */
 716        tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
 717        set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags);
 718        return -EFAULT;
 719}
 720
 721static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
 722{
 723        struct netdev_queue *txq;
 724        bool above_thresh;
 725        u32 tx_bytes = 0;
 726        u32 total_done = 0;
 727        u16 next_to_clean;
 728        u16 req_id;
 729        int tx_pkts = 0;
 730        int rc;
 731
 732        next_to_clean = tx_ring->next_to_clean;
 733        txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
 734
 735        while (tx_pkts < budget) {
 736                struct ena_tx_buffer *tx_info;
 737                struct sk_buff *skb;
 738                struct ena_com_buf *ena_buf;
 739                int i, nr_frags;
 740
 741                rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
 742                                                &req_id);
 743                if (rc)
 744                        break;
 745
 746                rc = validate_tx_req_id(tx_ring, req_id);
 747                if (rc)
 748                        break;
 749
 750                tx_info = &tx_ring->tx_buffer_info[req_id];
 751                skb = tx_info->skb;
 752
 753                /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
 754                prefetch(&skb->end);
 755
 756                tx_info->skb = NULL;
 757                tx_info->last_jiffies = 0;
 758
 759                if (likely(tx_info->num_of_bufs != 0)) {
 760                        ena_buf = tx_info->bufs;
 761
 762                        dma_unmap_single(tx_ring->dev,
 763                                         dma_unmap_addr(ena_buf, paddr),
 764                                         dma_unmap_len(ena_buf, len),
 765                                         DMA_TO_DEVICE);
 766
 767                        /* unmap remaining mapped pages */
 768                        nr_frags = tx_info->num_of_bufs - 1;
 769                        for (i = 0; i < nr_frags; i++) {
 770                                ena_buf++;
 771                                dma_unmap_page(tx_ring->dev,
 772                                               dma_unmap_addr(ena_buf, paddr),
 773                                               dma_unmap_len(ena_buf, len),
 774                                               DMA_TO_DEVICE);
 775                        }
 776                }
 777
 778                netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
 779                          "tx_poll: q %d skb %p completed\n", tx_ring->qid,
 780                          skb);
 781
 782                tx_bytes += skb->len;
 783                dev_kfree_skb(skb);
 784                tx_pkts++;
 785                total_done += tx_info->tx_descs;
 786
 787                tx_ring->free_tx_ids[next_to_clean] = req_id;
 788                next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
 789                                                     tx_ring->ring_size);
 790        }
 791
 792        tx_ring->next_to_clean = next_to_clean;
 793        ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
 794        ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
 795
 796        netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
 797
 798        netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
 799                  "tx_poll: q %d done. total pkts: %d\n",
 800                  tx_ring->qid, tx_pkts);
 801
 802        /* need to make the rings circular update visible to
 803         * ena_start_xmit() before checking for netif_queue_stopped().
 804         */
 805        smp_mb();
 806
 807        above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
 808                ENA_TX_WAKEUP_THRESH;
 809        if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
 810                __netif_tx_lock(txq, smp_processor_id());
 811                above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
 812                        ENA_TX_WAKEUP_THRESH;
 813                if (netif_tx_queue_stopped(txq) && above_thresh) {
 814                        netif_tx_wake_queue(txq);
 815                        u64_stats_update_begin(&tx_ring->syncp);
 816                        tx_ring->tx_stats.queue_wakeup++;
 817                        u64_stats_update_end(&tx_ring->syncp);
 818                }
 819                __netif_tx_unlock(txq);
 820        }
 821
 822        tx_ring->per_napi_bytes += tx_bytes;
 823        tx_ring->per_napi_packets += tx_pkts;
 824
 825        return tx_pkts;
 826}
 827
 828static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags)
 829{
 830        struct sk_buff *skb;
 831
 832        if (frags)
 833                skb = napi_get_frags(rx_ring->napi);
 834        else
 835                skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
 836                                                rx_ring->rx_copybreak);
 837
 838        if (unlikely(!skb)) {
 839                u64_stats_update_begin(&rx_ring->syncp);
 840                rx_ring->rx_stats.skb_alloc_fail++;
 841                u64_stats_update_end(&rx_ring->syncp);
 842                netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
 843                          "Failed to allocate skb. frags: %d\n", frags);
 844                return NULL;
 845        }
 846
 847        return skb;
 848}
 849
 850static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 851                                  struct ena_com_rx_buf_info *ena_bufs,
 852                                  u32 descs,
 853                                  u16 *next_to_clean)
 854{
 855        struct sk_buff *skb;
 856        struct ena_rx_buffer *rx_info;
 857        u16 len, req_id, buf = 0;
 858        void *va;
 859
 860        len = ena_bufs[buf].len;
 861        req_id = ena_bufs[buf].req_id;
 862        rx_info = &rx_ring->rx_buffer_info[req_id];
 863
 864        if (unlikely(!rx_info->page)) {
 865                netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
 866                          "Page is NULL\n");
 867                return NULL;
 868        }
 869
 870        netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 871                  "rx_info %p page %p\n",
 872                  rx_info, rx_info->page);
 873
 874        /* save virt address of first buffer */
 875        va = page_address(rx_info->page) + rx_info->page_offset;
 876        prefetch(va + NET_IP_ALIGN);
 877
 878        if (len <= rx_ring->rx_copybreak) {
 879                skb = ena_alloc_skb(rx_ring, false);
 880                if (unlikely(!skb))
 881                        return NULL;
 882
 883                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 884                          "rx allocated small packet. len %d. data_len %d\n",
 885                          skb->len, skb->data_len);
 886
 887                /* sync this buffer for CPU use */
 888                dma_sync_single_for_cpu(rx_ring->dev,
 889                                        dma_unmap_addr(&rx_info->ena_buf, paddr),
 890                                        len,
 891                                        DMA_FROM_DEVICE);
 892                skb_copy_to_linear_data(skb, va, len);
 893                dma_sync_single_for_device(rx_ring->dev,
 894                                           dma_unmap_addr(&rx_info->ena_buf, paddr),
 895                                           len,
 896                                           DMA_FROM_DEVICE);
 897
 898                skb_put(skb, len);
 899                skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 900                rx_ring->free_rx_ids[*next_to_clean] = req_id;
 901                *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
 902                                                     rx_ring->ring_size);
 903                return skb;
 904        }
 905
 906        skb = ena_alloc_skb(rx_ring, true);
 907        if (unlikely(!skb))
 908                return NULL;
 909
 910        do {
 911                dma_unmap_page(rx_ring->dev,
 912                               dma_unmap_addr(&rx_info->ena_buf, paddr),
 913                               PAGE_SIZE, DMA_FROM_DEVICE);
 914
 915                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
 916                                rx_info->page_offset, len, PAGE_SIZE);
 917
 918                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 919                          "rx skb updated. len %d. data_len %d\n",
 920                          skb->len, skb->data_len);
 921
 922                rx_info->page = NULL;
 923
 924                rx_ring->free_rx_ids[*next_to_clean] = req_id;
 925                *next_to_clean =
 926                        ENA_RX_RING_IDX_NEXT(*next_to_clean,
 927                                             rx_ring->ring_size);
 928                if (likely(--descs == 0))
 929                        break;
 930
 931                buf++;
 932                len = ena_bufs[buf].len;
 933                req_id = ena_bufs[buf].req_id;
 934                rx_info = &rx_ring->rx_buffer_info[req_id];
 935        } while (1);
 936
 937        return skb;
 938}
 939
 940/* ena_rx_checksum - indicate in skb if hw indicated a good cksum
 941 * @adapter: structure containing adapter specific data
 942 * @ena_rx_ctx: received packet context/metadata
 943 * @skb: skb currently being received and modified
 944 */
 945static inline void ena_rx_checksum(struct ena_ring *rx_ring,
 946                                   struct ena_com_rx_ctx *ena_rx_ctx,
 947                                   struct sk_buff *skb)
 948{
 949        /* Rx csum disabled */
 950        if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
 951                skb->ip_summed = CHECKSUM_NONE;
 952                return;
 953        }
 954
 955        /* For fragmented packets the checksum isn't valid */
 956        if (ena_rx_ctx->frag) {
 957                skb->ip_summed = CHECKSUM_NONE;
 958                return;
 959        }
 960
 961        /* if IP and error */
 962        if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
 963                     (ena_rx_ctx->l3_csum_err))) {
 964                /* ipv4 checksum error */
 965                skb->ip_summed = CHECKSUM_NONE;
 966                u64_stats_update_begin(&rx_ring->syncp);
 967                rx_ring->rx_stats.bad_csum++;
 968                u64_stats_update_end(&rx_ring->syncp);
 969                netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
 970                          "RX IPv4 header checksum error\n");
 971                return;
 972        }
 973
 974        /* if TCP/UDP */
 975        if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
 976                   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
 977                if (unlikely(ena_rx_ctx->l4_csum_err)) {
 978                        /* TCP/UDP checksum error */
 979                        u64_stats_update_begin(&rx_ring->syncp);
 980                        rx_ring->rx_stats.bad_csum++;
 981                        u64_stats_update_end(&rx_ring->syncp);
 982                        netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
 983                                  "RX L4 checksum error\n");
 984                        skb->ip_summed = CHECKSUM_NONE;
 985                        return;
 986                }
 987
 988                skb->ip_summed = CHECKSUM_UNNECESSARY;
 989        }
 990}
 991
 992static void ena_set_rx_hash(struct ena_ring *rx_ring,
 993                            struct ena_com_rx_ctx *ena_rx_ctx,
 994                            struct sk_buff *skb)
 995{
 996        enum pkt_hash_types hash_type;
 997
 998        if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
 999                if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1000                           (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
1001
1002                        hash_type = PKT_HASH_TYPE_L4;
1003                else
1004                        hash_type = PKT_HASH_TYPE_NONE;
1005
1006                /* Override hash type if the packet is fragmented */
1007                if (ena_rx_ctx->frag)
1008                        hash_type = PKT_HASH_TYPE_NONE;
1009
1010                skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
1011        }
1012}
1013
1014/* ena_clean_rx_irq - Cleanup RX irq
1015 * @rx_ring: RX ring to clean
1016 * @napi: napi handler
1017 * @budget: how many packets driver is allowed to clean
1018 *
1019 * Returns the number of cleaned buffers.
1020 */
1021static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
1022                            u32 budget)
1023{
1024        u16 next_to_clean = rx_ring->next_to_clean;
1025        u32 res_budget, work_done;
1026
1027        struct ena_com_rx_ctx ena_rx_ctx;
1028        struct ena_adapter *adapter;
1029        struct sk_buff *skb;
1030        int refill_required;
1031        int refill_threshold;
1032        int rc = 0;
1033        int total_len = 0;
1034        int rx_copybreak_pkt = 0;
1035        int i;
1036
1037        netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1038                  "%s qid %d\n", __func__, rx_ring->qid);
1039        res_budget = budget;
1040
1041        do {
1042                ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1043                ena_rx_ctx.max_bufs = rx_ring->sgl_size;
1044                ena_rx_ctx.descs = 0;
1045                rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
1046                                    rx_ring->ena_com_io_sq,
1047                                    &ena_rx_ctx);
1048                if (unlikely(rc))
1049                        goto error;
1050
1051                if (unlikely(ena_rx_ctx.descs == 0))
1052                        break;
1053
1054                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1055                          "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1056                          rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1057                          ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1058
1059                /* allocate skb and fill it */
1060                skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs,
1061                                 &next_to_clean);
1062
1063                /* exit if we failed to retrieve a buffer */
1064                if (unlikely(!skb)) {
1065                        for (i = 0; i < ena_rx_ctx.descs; i++) {
1066                                rx_ring->free_tx_ids[next_to_clean] =
1067                                        rx_ring->ena_bufs[i].req_id;
1068                                next_to_clean =
1069                                        ENA_RX_RING_IDX_NEXT(next_to_clean,
1070                                                             rx_ring->ring_size);
1071                        }
1072                        break;
1073                }
1074
1075                ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
1076
1077                ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
1078
1079                skb_record_rx_queue(skb, rx_ring->qid);
1080
1081                if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) {
1082                        total_len += rx_ring->ena_bufs[0].len;
1083                        rx_copybreak_pkt++;
1084                        napi_gro_receive(napi, skb);
1085                } else {
1086                        total_len += skb->len;
1087                        napi_gro_frags(napi);
1088                }
1089
1090                res_budget--;
1091        } while (likely(res_budget));
1092
1093        work_done = budget - res_budget;
1094        rx_ring->per_napi_bytes += total_len;
1095        rx_ring->per_napi_packets += work_done;
1096        u64_stats_update_begin(&rx_ring->syncp);
1097        rx_ring->rx_stats.bytes += total_len;
1098        rx_ring->rx_stats.cnt += work_done;
1099        rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
1100        u64_stats_update_end(&rx_ring->syncp);
1101
1102        rx_ring->next_to_clean = next_to_clean;
1103
1104        refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
1105        refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER;
1106
1107        /* Optimization, try to batch new rx buffers */
1108        if (refill_required > refill_threshold) {
1109                ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1110                ena_refill_rx_bufs(rx_ring, refill_required);
1111        }
1112
1113        return work_done;
1114
1115error:
1116        adapter = netdev_priv(rx_ring->netdev);
1117
1118        u64_stats_update_begin(&rx_ring->syncp);
1119        rx_ring->rx_stats.bad_desc_num++;
1120        u64_stats_update_end(&rx_ring->syncp);
1121
1122        /* Too many desc from the device. Trigger reset */
1123        adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
1124        set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
1125
1126        return 0;
1127}
1128
1129inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
1130                                       struct ena_ring *tx_ring)
1131{
1132        /* We apply adaptive moderation on Rx path only.
1133         * Tx uses static interrupt moderation.
1134         */
1135        ena_com_calculate_interrupt_delay(rx_ring->ena_dev,
1136                                          rx_ring->per_napi_packets,
1137                                          rx_ring->per_napi_bytes,
1138                                          &rx_ring->smoothed_interval,
1139                                          &rx_ring->moder_tbl_idx);
1140
1141        /* Reset per napi packets/bytes */
1142        tx_ring->per_napi_packets = 0;
1143        tx_ring->per_napi_bytes = 0;
1144        rx_ring->per_napi_packets = 0;
1145        rx_ring->per_napi_bytes = 0;
1146}
1147
1148static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
1149                                        struct ena_ring *rx_ring)
1150{
1151        struct ena_eth_io_intr_reg intr_reg;
1152
1153        /* Update intr register: rx intr delay,
1154         * tx intr delay and interrupt unmask
1155         */
1156        ena_com_update_intr_reg(&intr_reg,
1157                                rx_ring->smoothed_interval,
1158                                tx_ring->smoothed_interval,
1159                                true);
1160
1161        /* It is a shared MSI-X.
1162         * Tx and Rx CQ have pointer to it.
1163         * So we use one of them to reach the intr reg
1164         */
1165        ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
1166}
1167
1168static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
1169                                             struct ena_ring *rx_ring)
1170{
1171        int cpu = get_cpu();
1172        int numa_node;
1173
1174        /* Check only one ring since the 2 rings are running on the same cpu */
1175        if (likely(tx_ring->cpu == cpu))
1176                goto out;
1177
1178        numa_node = cpu_to_node(cpu);
1179        put_cpu();
1180
1181        if (numa_node != NUMA_NO_NODE) {
1182                ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
1183                ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node);
1184        }
1185
1186        tx_ring->cpu = cpu;
1187        rx_ring->cpu = cpu;
1188
1189        return;
1190out:
1191        put_cpu();
1192}
1193
1194static int ena_io_poll(struct napi_struct *napi, int budget)
1195{
1196        struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
1197        struct ena_ring *tx_ring, *rx_ring;
1198
1199        u32 tx_work_done;
1200        u32 rx_work_done;
1201        int tx_budget;
1202        int napi_comp_call = 0;
1203        int ret;
1204
1205        tx_ring = ena_napi->tx_ring;
1206        rx_ring = ena_napi->rx_ring;
1207
1208        tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
1209
1210        if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1211            test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
1212                napi_complete_done(napi, 0);
1213                return 0;
1214        }
1215
1216        tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
1217        rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
1218
1219        /* If the device is about to reset or down, avoid unmask
1220         * the interrupt and return 0 so NAPI won't reschedule
1221         */
1222        if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1223                     test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
1224                napi_complete_done(napi, 0);
1225                ret = 0;
1226
1227        } else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
1228                napi_comp_call = 1;
1229
1230                /* Update numa and unmask the interrupt only when schedule
1231                 * from the interrupt context (vs from sk_busy_loop)
1232                 */
1233                if (napi_complete_done(napi, rx_work_done)) {
1234                        /* Tx and Rx share the same interrupt vector */
1235                        if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
1236                                ena_adjust_intr_moderation(rx_ring, tx_ring);
1237
1238                        ena_unmask_interrupt(tx_ring, rx_ring);
1239                }
1240
1241                ena_update_ring_numa_node(tx_ring, rx_ring);
1242
1243                ret = rx_work_done;
1244        } else {
1245                ret = budget;
1246        }
1247
1248        u64_stats_update_begin(&tx_ring->syncp);
1249        tx_ring->tx_stats.napi_comp += napi_comp_call;
1250        tx_ring->tx_stats.tx_poll++;
1251        u64_stats_update_end(&tx_ring->syncp);
1252
1253        return ret;
1254}
1255
1256static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
1257{
1258        struct ena_adapter *adapter = (struct ena_adapter *)data;
1259
1260        ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1261
1262        /* Don't call the aenq handler before probe is done */
1263        if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
1264                ena_com_aenq_intr_handler(adapter->ena_dev, data);
1265
1266        return IRQ_HANDLED;
1267}
1268
1269/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
1270 * @irq: interrupt number
1271 * @data: pointer to a network interface private napi device structure
1272 */
1273static irqreturn_t ena_intr_msix_io(int irq, void *data)
1274{
1275        struct ena_napi *ena_napi = data;
1276
1277        napi_schedule_irqoff(&ena_napi->napi);
1278
1279        return IRQ_HANDLED;
1280}
1281
1282/* Reserve a single MSI-X vector for management (admin + aenq).
1283 * plus reserve one vector for each potential io queue.
1284 * the number of potential io queues is the minimum of what the device
1285 * supports and the number of vCPUs.
1286 */
1287static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
1288{
1289        int msix_vecs, irq_cnt;
1290
1291        if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1292                netif_err(adapter, probe, adapter->netdev,
1293                          "Error, MSI-X is already enabled\n");
1294                return -EPERM;
1295        }
1296
1297        /* Reserved the max msix vectors we might need */
1298        msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
1299
1300        netif_dbg(adapter, probe, adapter->netdev,
1301                  "trying to enable MSI-X, vectors %d\n", msix_vecs);
1302
1303        irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
1304                                        msix_vecs, PCI_IRQ_MSIX);
1305
1306        if (irq_cnt < 0) {
1307                netif_err(adapter, probe, adapter->netdev,
1308                          "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
1309                return -ENOSPC;
1310        }
1311
1312        if (irq_cnt != msix_vecs) {
1313                netif_notice(adapter, probe, adapter->netdev,
1314                             "enable only %d MSI-X (out of %d), reduce the number of queues\n",
1315                             irq_cnt, msix_vecs);
1316                adapter->num_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
1317        }
1318
1319        if (ena_init_rx_cpu_rmap(adapter))
1320                netif_warn(adapter, probe, adapter->netdev,
1321                           "Failed to map IRQs to CPUs\n");
1322
1323        adapter->msix_vecs = irq_cnt;
1324        set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
1325
1326        return 0;
1327}
1328
1329static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1330{
1331        u32 cpu;
1332
1333        snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1334                 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1335                 pci_name(adapter->pdev));
1336        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
1337                ena_intr_msix_mgmnt;
1338        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1339        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1340                pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
1341        cpu = cpumask_first(cpu_online_mask);
1342        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
1343        cpumask_set_cpu(cpu,
1344                        &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
1345}
1346
1347static void ena_setup_io_intr(struct ena_adapter *adapter)
1348{
1349        struct net_device *netdev;
1350        int irq_idx, i, cpu;
1351
1352        netdev = adapter->netdev;
1353
1354        for (i = 0; i < adapter->num_queues; i++) {
1355                irq_idx = ENA_IO_IRQ_IDX(i);
1356                cpu = i % num_online_cpus();
1357
1358                snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1359                         "%s-Tx-Rx-%d", netdev->name, i);
1360                adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
1361                adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
1362                adapter->irq_tbl[irq_idx].vector =
1363                        pci_irq_vector(adapter->pdev, irq_idx);
1364                adapter->irq_tbl[irq_idx].cpu = cpu;
1365
1366                cpumask_set_cpu(cpu,
1367                                &adapter->irq_tbl[irq_idx].affinity_hint_mask);
1368        }
1369}
1370
1371static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
1372{
1373        unsigned long flags = 0;
1374        struct ena_irq *irq;
1375        int rc;
1376
1377        irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1378        rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1379                         irq->data);
1380        if (rc) {
1381                netif_err(adapter, probe, adapter->netdev,
1382                          "failed to request admin irq\n");
1383                return rc;
1384        }
1385
1386        netif_dbg(adapter, probe, adapter->netdev,
1387                  "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
1388                  irq->affinity_hint_mask.bits[0], irq->vector);
1389
1390        irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1391
1392        return rc;
1393}
1394
1395static int ena_request_io_irq(struct ena_adapter *adapter)
1396{
1397        unsigned long flags = 0;
1398        struct ena_irq *irq;
1399        int rc = 0, i, k;
1400
1401        if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1402                netif_err(adapter, ifup, adapter->netdev,
1403                          "Failed to request I/O IRQ: MSI-X is not enabled\n");
1404                return -EINVAL;
1405        }
1406
1407        for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1408                irq = &adapter->irq_tbl[i];
1409                rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1410                                 irq->data);
1411                if (rc) {
1412                        netif_err(adapter, ifup, adapter->netdev,
1413                                  "Failed to request I/O IRQ. index %d rc %d\n",
1414                                   i, rc);
1415                        goto err;
1416                }
1417
1418                netif_dbg(adapter, ifup, adapter->netdev,
1419                          "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
1420                          i, irq->affinity_hint_mask.bits[0], irq->vector);
1421
1422                irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1423        }
1424
1425        return rc;
1426
1427err:
1428        for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
1429                irq = &adapter->irq_tbl[k];
1430                free_irq(irq->vector, irq->data);
1431        }
1432
1433        return rc;
1434}
1435
1436static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
1437{
1438        struct ena_irq *irq;
1439
1440        irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1441        synchronize_irq(irq->vector);
1442        irq_set_affinity_hint(irq->vector, NULL);
1443        free_irq(irq->vector, irq->data);
1444}
1445
1446static void ena_free_io_irq(struct ena_adapter *adapter)
1447{
1448        struct ena_irq *irq;
1449        int i;
1450
1451#ifdef CONFIG_RFS_ACCEL
1452        if (adapter->msix_vecs >= 1) {
1453                free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
1454                adapter->netdev->rx_cpu_rmap = NULL;
1455        }
1456#endif /* CONFIG_RFS_ACCEL */
1457
1458        for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1459                irq = &adapter->irq_tbl[i];
1460                irq_set_affinity_hint(irq->vector, NULL);
1461                free_irq(irq->vector, irq->data);
1462        }
1463}
1464
1465static void ena_disable_msix(struct ena_adapter *adapter)
1466{
1467        if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
1468                pci_free_irq_vectors(adapter->pdev);
1469}
1470
1471static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
1472{
1473        int i;
1474
1475        if (!netif_running(adapter->netdev))
1476                return;
1477
1478        for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++)
1479                synchronize_irq(adapter->irq_tbl[i].vector);
1480}
1481
1482static void ena_del_napi(struct ena_adapter *adapter)
1483{
1484        int i;
1485
1486        for (i = 0; i < adapter->num_queues; i++)
1487                netif_napi_del(&adapter->ena_napi[i].napi);
1488}
1489
1490static void ena_init_napi(struct ena_adapter *adapter)
1491{
1492        struct ena_napi *napi;
1493        int i;
1494
1495        for (i = 0; i < adapter->num_queues; i++) {
1496                napi = &adapter->ena_napi[i];
1497
1498                netif_napi_add(adapter->netdev,
1499                               &adapter->ena_napi[i].napi,
1500                               ena_io_poll,
1501                               ENA_NAPI_BUDGET);
1502                napi->rx_ring = &adapter->rx_ring[i];
1503                napi->tx_ring = &adapter->tx_ring[i];
1504                napi->qid = i;
1505        }
1506}
1507
1508static void ena_napi_disable_all(struct ena_adapter *adapter)
1509{
1510        int i;
1511
1512        for (i = 0; i < adapter->num_queues; i++)
1513                napi_disable(&adapter->ena_napi[i].napi);
1514}
1515
1516static void ena_napi_enable_all(struct ena_adapter *adapter)
1517{
1518        int i;
1519
1520        for (i = 0; i < adapter->num_queues; i++)
1521                napi_enable(&adapter->ena_napi[i].napi);
1522}
1523
1524static void ena_restore_ethtool_params(struct ena_adapter *adapter)
1525{
1526        adapter->tx_usecs = 0;
1527        adapter->rx_usecs = 0;
1528        adapter->tx_frames = 1;
1529        adapter->rx_frames = 1;
1530}
1531
1532/* Configure the Rx forwarding */
1533static int ena_rss_configure(struct ena_adapter *adapter)
1534{
1535        struct ena_com_dev *ena_dev = adapter->ena_dev;
1536        int rc;
1537
1538        /* In case the RSS table wasn't initialized by probe */
1539        if (!ena_dev->rss.tbl_log_size) {
1540                rc = ena_rss_init_default(adapter);
1541                if (rc && (rc != -EOPNOTSUPP)) {
1542                        netif_err(adapter, ifup, adapter->netdev,
1543                                  "Failed to init RSS rc: %d\n", rc);
1544                        return rc;
1545                }
1546        }
1547
1548        /* Set indirect table */
1549        rc = ena_com_indirect_table_set(ena_dev);
1550        if (unlikely(rc && rc != -EOPNOTSUPP))
1551                return rc;
1552
1553        /* Configure hash function (if supported) */
1554        rc = ena_com_set_hash_function(ena_dev);
1555        if (unlikely(rc && (rc != -EOPNOTSUPP)))
1556                return rc;
1557
1558        /* Configure hash inputs (if supported) */
1559        rc = ena_com_set_hash_ctrl(ena_dev);
1560        if (unlikely(rc && (rc != -EOPNOTSUPP)))
1561                return rc;
1562
1563        return 0;
1564}
1565
1566static int ena_up_complete(struct ena_adapter *adapter)
1567{
1568        int rc, i;
1569
1570        rc = ena_rss_configure(adapter);
1571        if (rc)
1572                return rc;
1573
1574        ena_init_napi(adapter);
1575
1576        ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
1577
1578        ena_refill_all_rx_bufs(adapter);
1579
1580        /* enable transmits */
1581        netif_tx_start_all_queues(adapter->netdev);
1582
1583        ena_restore_ethtool_params(adapter);
1584
1585        ena_napi_enable_all(adapter);
1586
1587        /* Enable completion queues interrupt */
1588        for (i = 0; i < adapter->num_queues; i++)
1589                ena_unmask_interrupt(&adapter->tx_ring[i],
1590                                     &adapter->rx_ring[i]);
1591
1592        /* schedule napi in case we had pending packets
1593         * from the last time we disable napi
1594         */
1595        for (i = 0; i < adapter->num_queues; i++)
1596                napi_schedule(&adapter->ena_napi[i].napi);
1597
1598        return 0;
1599}
1600
1601static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
1602{
1603        struct ena_com_create_io_ctx ctx = { 0 };
1604        struct ena_com_dev *ena_dev;
1605        struct ena_ring *tx_ring;
1606        u32 msix_vector;
1607        u16 ena_qid;
1608        int rc;
1609
1610        ena_dev = adapter->ena_dev;
1611
1612        tx_ring = &adapter->tx_ring[qid];
1613        msix_vector = ENA_IO_IRQ_IDX(qid);
1614        ena_qid = ENA_IO_TXQ_IDX(qid);
1615
1616        ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1617        ctx.qid = ena_qid;
1618        ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1619        ctx.msix_vector = msix_vector;
1620        ctx.queue_size = adapter->tx_ring_size;
1621        ctx.numa_node = cpu_to_node(tx_ring->cpu);
1622
1623        rc = ena_com_create_io_queue(ena_dev, &ctx);
1624        if (rc) {
1625                netif_err(adapter, ifup, adapter->netdev,
1626                          "Failed to create I/O TX queue num %d rc: %d\n",
1627                          qid, rc);
1628                return rc;
1629        }
1630
1631        rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1632                                     &tx_ring->ena_com_io_sq,
1633                                     &tx_ring->ena_com_io_cq);
1634        if (rc) {
1635                netif_err(adapter, ifup, adapter->netdev,
1636                          "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
1637                          qid, rc);
1638                ena_com_destroy_io_queue(ena_dev, ena_qid);
1639                return rc;
1640        }
1641
1642        ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
1643        return rc;
1644}
1645
1646static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
1647{
1648        struct ena_com_dev *ena_dev = adapter->ena_dev;
1649        int rc, i;
1650
1651        for (i = 0; i < adapter->num_queues; i++) {
1652                rc = ena_create_io_tx_queue(adapter, i);
1653                if (rc)
1654                        goto create_err;
1655        }
1656
1657        return 0;
1658
1659create_err:
1660        while (i--)
1661                ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1662
1663        return rc;
1664}
1665
1666static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
1667{
1668        struct ena_com_dev *ena_dev;
1669        struct ena_com_create_io_ctx ctx = { 0 };
1670        struct ena_ring *rx_ring;
1671        u32 msix_vector;
1672        u16 ena_qid;
1673        int rc;
1674
1675        ena_dev = adapter->ena_dev;
1676
1677        rx_ring = &adapter->rx_ring[qid];
1678        msix_vector = ENA_IO_IRQ_IDX(qid);
1679        ena_qid = ENA_IO_RXQ_IDX(qid);
1680
1681        ctx.qid = ena_qid;
1682        ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1683        ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1684        ctx.msix_vector = msix_vector;
1685        ctx.queue_size = adapter->rx_ring_size;
1686        ctx.numa_node = cpu_to_node(rx_ring->cpu);
1687
1688        rc = ena_com_create_io_queue(ena_dev, &ctx);
1689        if (rc) {
1690                netif_err(adapter, ifup, adapter->netdev,
1691                          "Failed to create I/O RX queue num %d rc: %d\n",
1692                          qid, rc);
1693                return rc;
1694        }
1695
1696        rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1697                                     &rx_ring->ena_com_io_sq,
1698                                     &rx_ring->ena_com_io_cq);
1699        if (rc) {
1700                netif_err(adapter, ifup, adapter->netdev,
1701                          "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
1702                          qid, rc);
1703                ena_com_destroy_io_queue(ena_dev, ena_qid);
1704                return rc;
1705        }
1706
1707        ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
1708
1709        return rc;
1710}
1711
1712static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
1713{
1714        struct ena_com_dev *ena_dev = adapter->ena_dev;
1715        int rc, i;
1716
1717        for (i = 0; i < adapter->num_queues; i++) {
1718                rc = ena_create_io_rx_queue(adapter, i);
1719                if (rc)
1720                        goto create_err;
1721        }
1722
1723        return 0;
1724
1725create_err:
1726        while (i--)
1727                ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1728
1729        return rc;
1730}
1731
1732static int ena_up(struct ena_adapter *adapter)
1733{
1734        int rc;
1735
1736        netdev_dbg(adapter->netdev, "%s\n", __func__);
1737
1738        ena_setup_io_intr(adapter);
1739
1740        rc = ena_request_io_irq(adapter);
1741        if (rc)
1742                goto err_req_irq;
1743
1744        /* allocate transmit descriptors */
1745        rc = ena_setup_all_tx_resources(adapter);
1746        if (rc)
1747                goto err_setup_tx;
1748
1749        /* allocate receive descriptors */
1750        rc = ena_setup_all_rx_resources(adapter);
1751        if (rc)
1752                goto err_setup_rx;
1753
1754        /* Create TX queues */
1755        rc = ena_create_all_io_tx_queues(adapter);
1756        if (rc)
1757                goto err_create_tx_queues;
1758
1759        /* Create RX queues */
1760        rc = ena_create_all_io_rx_queues(adapter);
1761        if (rc)
1762                goto err_create_rx_queues;
1763
1764        rc = ena_up_complete(adapter);
1765        if (rc)
1766                goto err_up;
1767
1768        if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
1769                netif_carrier_on(adapter->netdev);
1770
1771        u64_stats_update_begin(&adapter->syncp);
1772        adapter->dev_stats.interface_up++;
1773        u64_stats_update_end(&adapter->syncp);
1774
1775        set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
1776
1777        return rc;
1778
1779err_up:
1780        ena_destroy_all_rx_queues(adapter);
1781err_create_rx_queues:
1782        ena_destroy_all_tx_queues(adapter);
1783err_create_tx_queues:
1784        ena_free_all_io_rx_resources(adapter);
1785err_setup_rx:
1786        ena_free_all_io_tx_resources(adapter);
1787err_setup_tx:
1788        ena_free_io_irq(adapter);
1789err_req_irq:
1790
1791        return rc;
1792}
1793
1794static void ena_down(struct ena_adapter *adapter)
1795{
1796        netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
1797
1798        clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
1799
1800        u64_stats_update_begin(&adapter->syncp);
1801        adapter->dev_stats.interface_down++;
1802        u64_stats_update_end(&adapter->syncp);
1803
1804        netif_carrier_off(adapter->netdev);
1805        netif_tx_disable(adapter->netdev);
1806
1807        /* After this point the napi handler won't enable the tx queue */
1808        ena_napi_disable_all(adapter);
1809
1810        /* After destroy the queue there won't be any new interrupts */
1811
1812        if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
1813                int rc;
1814
1815                rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
1816                if (rc)
1817                        dev_err(&adapter->pdev->dev, "Device reset failed\n");
1818        }
1819
1820        ena_destroy_all_io_queues(adapter);
1821
1822        ena_disable_io_intr_sync(adapter);
1823        ena_free_io_irq(adapter);
1824        ena_del_napi(adapter);
1825
1826        ena_free_all_tx_bufs(adapter);
1827        ena_free_all_rx_bufs(adapter);
1828        ena_free_all_io_tx_resources(adapter);
1829        ena_free_all_io_rx_resources(adapter);
1830}
1831
1832/* ena_open - Called when a network interface is made active
1833 * @netdev: network interface device structure
1834 *
1835 * Returns 0 on success, negative value on failure
1836 *
1837 * The open entry point is called when a network interface is made
1838 * active by the system (IFF_UP).  At this point all resources needed
1839 * for transmit and receive operations are allocated, the interrupt
1840 * handler is registered with the OS, the watchdog timer is started,
1841 * and the stack is notified that the interface is ready.
1842 */
1843static int ena_open(struct net_device *netdev)
1844{
1845        struct ena_adapter *adapter = netdev_priv(netdev);
1846        int rc;
1847
1848        /* Notify the stack of the actual queue counts. */
1849        rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues);
1850        if (rc) {
1851                netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
1852                return rc;
1853        }
1854
1855        rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues);
1856        if (rc) {
1857                netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
1858                return rc;
1859        }
1860
1861        rc = ena_up(adapter);
1862        if (rc)
1863                return rc;
1864
1865        return rc;
1866}
1867
1868/* ena_close - Disables a network interface
1869 * @netdev: network interface device structure
1870 *
1871 * Returns 0, this is not allowed to fail
1872 *
1873 * The close entry point is called when an interface is de-activated
1874 * by the OS.  The hardware is still under the drivers control, but
1875 * needs to be disabled.  A global MAC reset is issued to stop the
1876 * hardware, and all transmit and receive resources are freed.
1877 */
1878static int ena_close(struct net_device *netdev)
1879{
1880        struct ena_adapter *adapter = netdev_priv(netdev);
1881
1882        netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
1883
1884        if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
1885                ena_down(adapter);
1886
1887        return 0;
1888}
1889
1890static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
1891{
1892        u32 mss = skb_shinfo(skb)->gso_size;
1893        struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
1894        u8 l4_protocol = 0;
1895
1896        if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
1897                ena_tx_ctx->l4_csum_enable = 1;
1898                if (mss) {
1899                        ena_tx_ctx->tso_enable = 1;
1900                        ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
1901                        ena_tx_ctx->l4_csum_partial = 0;
1902                } else {
1903                        ena_tx_ctx->tso_enable = 0;
1904                        ena_meta->l4_hdr_len = 0;
1905                        ena_tx_ctx->l4_csum_partial = 1;
1906                }
1907
1908                switch (ip_hdr(skb)->version) {
1909                case IPVERSION:
1910                        ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
1911                        if (ip_hdr(skb)->frag_off & htons(IP_DF))
1912                                ena_tx_ctx->df = 1;
1913                        if (mss)
1914                                ena_tx_ctx->l3_csum_enable = 1;
1915                        l4_protocol = ip_hdr(skb)->protocol;
1916                        break;
1917                case 6:
1918                        ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
1919                        l4_protocol = ipv6_hdr(skb)->nexthdr;
1920                        break;
1921                default:
1922                        break;
1923                }
1924
1925                if (l4_protocol == IPPROTO_TCP)
1926                        ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
1927                else
1928                        ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
1929
1930                ena_meta->mss = mss;
1931                ena_meta->l3_hdr_len = skb_network_header_len(skb);
1932                ena_meta->l3_hdr_offset = skb_network_offset(skb);
1933                ena_tx_ctx->meta_valid = 1;
1934
1935        } else {
1936                ena_tx_ctx->meta_valid = 0;
1937        }
1938}
1939
1940static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
1941                                       struct sk_buff *skb)
1942{
1943        int num_frags, header_len, rc;
1944
1945        num_frags = skb_shinfo(skb)->nr_frags;
1946        header_len = skb_headlen(skb);
1947
1948        if (num_frags < tx_ring->sgl_size)
1949                return 0;
1950
1951        if ((num_frags == tx_ring->sgl_size) &&
1952            (header_len < tx_ring->tx_max_header_size))
1953                return 0;
1954
1955        u64_stats_update_begin(&tx_ring->syncp);
1956        tx_ring->tx_stats.linearize++;
1957        u64_stats_update_end(&tx_ring->syncp);
1958
1959        rc = skb_linearize(skb);
1960        if (unlikely(rc)) {
1961                u64_stats_update_begin(&tx_ring->syncp);
1962                tx_ring->tx_stats.linearize_failed++;
1963                u64_stats_update_end(&tx_ring->syncp);
1964        }
1965
1966        return rc;
1967}
1968
1969/* Called with netif_tx_lock. */
1970static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
1971{
1972        struct ena_adapter *adapter = netdev_priv(dev);
1973        struct ena_tx_buffer *tx_info;
1974        struct ena_com_tx_ctx ena_tx_ctx;
1975        struct ena_ring *tx_ring;
1976        struct netdev_queue *txq;
1977        struct ena_com_buf *ena_buf;
1978        void *push_hdr;
1979        u32 len, last_frag;
1980        u16 next_to_use;
1981        u16 req_id;
1982        u16 push_len;
1983        u16 header_len;
1984        dma_addr_t dma;
1985        int qid, rc, nb_hw_desc;
1986        int i = -1;
1987
1988        netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
1989        /*  Determine which tx ring we will be placed on */
1990        qid = skb_get_queue_mapping(skb);
1991        tx_ring = &adapter->tx_ring[qid];
1992        txq = netdev_get_tx_queue(dev, qid);
1993
1994        rc = ena_check_and_linearize_skb(tx_ring, skb);
1995        if (unlikely(rc))
1996                goto error_drop_packet;
1997
1998        skb_tx_timestamp(skb);
1999        len = skb_headlen(skb);
2000
2001        next_to_use = tx_ring->next_to_use;
2002        req_id = tx_ring->free_tx_ids[next_to_use];
2003        tx_info = &tx_ring->tx_buffer_info[req_id];
2004        tx_info->num_of_bufs = 0;
2005
2006        WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
2007        ena_buf = tx_info->bufs;
2008        tx_info->skb = skb;
2009
2010        if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2011                /* prepared the push buffer */
2012                push_len = min_t(u32, len, tx_ring->tx_max_header_size);
2013                header_len = push_len;
2014                push_hdr = skb->data;
2015        } else {
2016                push_len = 0;
2017                header_len = min_t(u32, len, tx_ring->tx_max_header_size);
2018                push_hdr = NULL;
2019        }
2020
2021        netif_dbg(adapter, tx_queued, dev,
2022                  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
2023                  push_hdr, push_len);
2024
2025        if (len > push_len) {
2026                dma = dma_map_single(tx_ring->dev, skb->data + push_len,
2027                                     len - push_len, DMA_TO_DEVICE);
2028                if (dma_mapping_error(tx_ring->dev, dma))
2029                        goto error_report_dma_error;
2030
2031                ena_buf->paddr = dma;
2032                ena_buf->len = len - push_len;
2033
2034                ena_buf++;
2035                tx_info->num_of_bufs++;
2036        }
2037
2038        last_frag = skb_shinfo(skb)->nr_frags;
2039
2040        for (i = 0; i < last_frag; i++) {
2041                const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2042
2043                len = skb_frag_size(frag);
2044                dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len,
2045                                       DMA_TO_DEVICE);
2046                if (dma_mapping_error(tx_ring->dev, dma))
2047                        goto error_report_dma_error;
2048
2049                ena_buf->paddr = dma;
2050                ena_buf->len = len;
2051                ena_buf++;
2052        }
2053
2054        tx_info->num_of_bufs += last_frag;
2055
2056        memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
2057        ena_tx_ctx.ena_bufs = tx_info->bufs;
2058        ena_tx_ctx.push_header = push_hdr;
2059        ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
2060        ena_tx_ctx.req_id = req_id;
2061        ena_tx_ctx.header_len = header_len;
2062
2063        /* set flags and meta data */
2064        ena_tx_csum(&ena_tx_ctx, skb);
2065
2066        /* prepare the packet's descriptors to dma engine */
2067        rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
2068                                &nb_hw_desc);
2069
2070        if (unlikely(rc)) {
2071                netif_err(adapter, tx_queued, dev,
2072                          "failed to prepare tx bufs\n");
2073                u64_stats_update_begin(&tx_ring->syncp);
2074                tx_ring->tx_stats.queue_stop++;
2075                tx_ring->tx_stats.prepare_ctx_err++;
2076                u64_stats_update_end(&tx_ring->syncp);
2077                netif_tx_stop_queue(txq);
2078                goto error_unmap_dma;
2079        }
2080
2081        netdev_tx_sent_queue(txq, skb->len);
2082
2083        u64_stats_update_begin(&tx_ring->syncp);
2084        tx_ring->tx_stats.cnt++;
2085        tx_ring->tx_stats.bytes += skb->len;
2086        u64_stats_update_end(&tx_ring->syncp);
2087
2088        tx_info->tx_descs = nb_hw_desc;
2089        tx_info->last_jiffies = jiffies;
2090        tx_info->print_once = 0;
2091
2092        tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
2093                tx_ring->ring_size);
2094
2095        /* This WMB is aimed to:
2096         * 1 - perform smp barrier before reading next_to_completion
2097         * 2 - make sure the desc were written before trigger DB
2098         */
2099        wmb();
2100
2101        /* stop the queue when no more space available, the packet can have up
2102         * to sgl_size + 2. one for the meta descriptor and one for header
2103         * (if the header is larger than tx_max_header_size).
2104         */
2105        if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) <
2106                     (tx_ring->sgl_size + 2))) {
2107                netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
2108                          __func__, qid);
2109
2110                netif_tx_stop_queue(txq);
2111                u64_stats_update_begin(&tx_ring->syncp);
2112                tx_ring->tx_stats.queue_stop++;
2113                u64_stats_update_end(&tx_ring->syncp);
2114
2115                /* There is a rare condition where this function decide to
2116                 * stop the queue but meanwhile clean_tx_irq updates
2117                 * next_to_completion and terminates.
2118                 * The queue will remain stopped forever.
2119                 * To solve this issue this function perform rmb, check
2120                 * the wakeup condition and wake up the queue if needed.
2121                 */
2122                smp_rmb();
2123
2124                if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq)
2125                                > ENA_TX_WAKEUP_THRESH) {
2126                        netif_tx_wake_queue(txq);
2127                        u64_stats_update_begin(&tx_ring->syncp);
2128                        tx_ring->tx_stats.queue_wakeup++;
2129                        u64_stats_update_end(&tx_ring->syncp);
2130                }
2131        }
2132
2133        if (netif_xmit_stopped(txq) || !skb->xmit_more) {
2134                /* trigger the dma engine */
2135                ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
2136                u64_stats_update_begin(&tx_ring->syncp);
2137                tx_ring->tx_stats.doorbells++;
2138                u64_stats_update_end(&tx_ring->syncp);
2139        }
2140
2141        return NETDEV_TX_OK;
2142
2143error_report_dma_error:
2144        u64_stats_update_begin(&tx_ring->syncp);
2145        tx_ring->tx_stats.dma_mapping_err++;
2146        u64_stats_update_end(&tx_ring->syncp);
2147        netdev_warn(adapter->netdev, "failed to map skb\n");
2148
2149        tx_info->skb = NULL;
2150
2151error_unmap_dma:
2152        if (i >= 0) {
2153                /* save value of frag that failed */
2154                last_frag = i;
2155
2156                /* start back at beginning and unmap skb */
2157                tx_info->skb = NULL;
2158                ena_buf = tx_info->bufs;
2159                dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
2160                                 dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
2161
2162                /* unmap remaining mapped pages */
2163                for (i = 0; i < last_frag; i++) {
2164                        ena_buf++;
2165                        dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
2166                                       dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
2167                }
2168        }
2169
2170error_drop_packet:
2171
2172        dev_kfree_skb(skb);
2173        return NETDEV_TX_OK;
2174}
2175
2176#ifdef CONFIG_NET_POLL_CONTROLLER
2177static void ena_netpoll(struct net_device *netdev)
2178{
2179        struct ena_adapter *adapter = netdev_priv(netdev);
2180        int i;
2181
2182        /* Dont schedule NAPI if the driver is in the middle of reset
2183         * or netdev is down.
2184         */
2185
2186        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) ||
2187            test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
2188                return;
2189
2190        for (i = 0; i < adapter->num_queues; i++)
2191                napi_schedule(&adapter->ena_napi[i].napi);
2192}
2193#endif /* CONFIG_NET_POLL_CONTROLLER */
2194
2195static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
2196                            void *accel_priv, select_queue_fallback_t fallback)
2197{
2198        u16 qid;
2199        /* we suspect that this is good for in--kernel network services that
2200         * want to loop incoming skb rx to tx in normal user generated traffic,
2201         * most probably we will not get to this
2202         */
2203        if (skb_rx_queue_recorded(skb))
2204                qid = skb_get_rx_queue(skb);
2205        else
2206                qid = fallback(dev, skb);
2207
2208        return qid;
2209}
2210
2211static void ena_config_host_info(struct ena_com_dev *ena_dev)
2212{
2213        struct ena_admin_host_info *host_info;
2214        int rc;
2215
2216        /* Allocate only the host info */
2217        rc = ena_com_allocate_host_info(ena_dev);
2218        if (rc) {
2219                pr_err("Cannot allocate host info\n");
2220                return;
2221        }
2222
2223        host_info = ena_dev->host_attr.host_info;
2224
2225        host_info->os_type = ENA_ADMIN_OS_LINUX;
2226        host_info->kernel_ver = LINUX_VERSION_CODE;
2227        strncpy(host_info->kernel_ver_str, utsname()->version,
2228                sizeof(host_info->kernel_ver_str) - 1);
2229        host_info->os_dist = 0;
2230        strncpy(host_info->os_dist_str, utsname()->release,
2231                sizeof(host_info->os_dist_str) - 1);
2232        host_info->driver_version =
2233                (DRV_MODULE_VER_MAJOR) |
2234                (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2235                (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
2236
2237        rc = ena_com_set_host_attributes(ena_dev);
2238        if (rc) {
2239                if (rc == -EOPNOTSUPP)
2240                        pr_warn("Cannot set host attributes\n");
2241                else
2242                        pr_err("Cannot set host attributes\n");
2243
2244                goto err;
2245        }
2246
2247        return;
2248
2249err:
2250        ena_com_delete_host_info(ena_dev);
2251}
2252
2253static void ena_config_debug_area(struct ena_adapter *adapter)
2254{
2255        u32 debug_area_size;
2256        int rc, ss_count;
2257
2258        ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
2259        if (ss_count <= 0) {
2260                netif_err(adapter, drv, adapter->netdev,
2261                          "SS count is negative\n");
2262                return;
2263        }
2264
2265        /* allocate 32 bytes for each string and 64bit for the value */
2266        debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
2267
2268        rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
2269        if (rc) {
2270                pr_err("Cannot allocate debug area\n");
2271                return;
2272        }
2273
2274        rc = ena_com_set_host_attributes(adapter->ena_dev);
2275        if (rc) {
2276                if (rc == -EOPNOTSUPP)
2277                        netif_warn(adapter, drv, adapter->netdev,
2278                                   "Cannot set host attributes\n");
2279                else
2280                        netif_err(adapter, drv, adapter->netdev,
2281                                  "Cannot set host attributes\n");
2282                goto err;
2283        }
2284
2285        return;
2286err:
2287        ena_com_delete_debug_area(adapter->ena_dev);
2288}
2289
2290static void ena_get_stats64(struct net_device *netdev,
2291                            struct rtnl_link_stats64 *stats)
2292{
2293        struct ena_adapter *adapter = netdev_priv(netdev);
2294        struct ena_ring *rx_ring, *tx_ring;
2295        unsigned int start;
2296        u64 rx_drops;
2297        int i;
2298
2299        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2300                return;
2301
2302        for (i = 0; i < adapter->num_queues; i++) {
2303                u64 bytes, packets;
2304
2305                tx_ring = &adapter->tx_ring[i];
2306
2307                do {
2308                        start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
2309                        packets = tx_ring->tx_stats.cnt;
2310                        bytes = tx_ring->tx_stats.bytes;
2311                } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
2312
2313                stats->tx_packets += packets;
2314                stats->tx_bytes += bytes;
2315
2316                rx_ring = &adapter->rx_ring[i];
2317
2318                do {
2319                        start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
2320                        packets = rx_ring->rx_stats.cnt;
2321                        bytes = rx_ring->rx_stats.bytes;
2322                } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
2323
2324                stats->rx_packets += packets;
2325                stats->rx_bytes += bytes;
2326        }
2327
2328        do {
2329                start = u64_stats_fetch_begin_irq(&adapter->syncp);
2330                rx_drops = adapter->dev_stats.rx_drops;
2331        } while (u64_stats_fetch_retry_irq(&adapter->syncp, start));
2332
2333        stats->rx_dropped = rx_drops;
2334
2335        stats->multicast = 0;
2336        stats->collisions = 0;
2337
2338        stats->rx_length_errors = 0;
2339        stats->rx_crc_errors = 0;
2340        stats->rx_frame_errors = 0;
2341        stats->rx_fifo_errors = 0;
2342        stats->rx_missed_errors = 0;
2343        stats->tx_window_errors = 0;
2344
2345        stats->rx_errors = 0;
2346        stats->tx_errors = 0;
2347}
2348
2349static const struct net_device_ops ena_netdev_ops = {
2350        .ndo_open               = ena_open,
2351        .ndo_stop               = ena_close,
2352        .ndo_start_xmit         = ena_start_xmit,
2353        .ndo_select_queue       = ena_select_queue,
2354        .ndo_get_stats64        = ena_get_stats64,
2355        .ndo_tx_timeout         = ena_tx_timeout,
2356        .ndo_change_mtu         = ena_change_mtu,
2357        .ndo_set_mac_address    = NULL,
2358        .ndo_validate_addr      = eth_validate_addr,
2359#ifdef CONFIG_NET_POLL_CONTROLLER
2360        .ndo_poll_controller    = ena_netpoll,
2361#endif /* CONFIG_NET_POLL_CONTROLLER */
2362};
2363
2364static void ena_device_io_suspend(struct work_struct *work)
2365{
2366        struct ena_adapter *adapter =
2367                container_of(work, struct ena_adapter, suspend_io_task);
2368        struct net_device *netdev = adapter->netdev;
2369
2370        /* ena_napi_disable_all disables only the IO handling.
2371         * We are still subject to AENQ keep alive watchdog.
2372         */
2373        u64_stats_update_begin(&adapter->syncp);
2374        adapter->dev_stats.io_suspend++;
2375        u64_stats_update_begin(&adapter->syncp);
2376        ena_napi_disable_all(adapter);
2377        netif_tx_lock(netdev);
2378        netif_device_detach(netdev);
2379        netif_tx_unlock(netdev);
2380}
2381
2382static void ena_device_io_resume(struct work_struct *work)
2383{
2384        struct ena_adapter *adapter =
2385                container_of(work, struct ena_adapter, resume_io_task);
2386        struct net_device *netdev = adapter->netdev;
2387
2388        u64_stats_update_begin(&adapter->syncp);
2389        adapter->dev_stats.io_resume++;
2390        u64_stats_update_end(&adapter->syncp);
2391
2392        netif_device_attach(netdev);
2393        ena_napi_enable_all(adapter);
2394}
2395
2396static int ena_device_validate_params(struct ena_adapter *adapter,
2397                                      struct ena_com_dev_get_features_ctx *get_feat_ctx)
2398{
2399        struct net_device *netdev = adapter->netdev;
2400        int rc;
2401
2402        rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
2403                              adapter->mac_addr);
2404        if (!rc) {
2405                netif_err(adapter, drv, netdev,
2406                          "Error, mac address are different\n");
2407                return -EINVAL;
2408        }
2409
2410        if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) ||
2411            (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) {
2412                netif_err(adapter, drv, netdev,
2413                          "Error, device doesn't support enough queues\n");
2414                return -EINVAL;
2415        }
2416
2417        if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
2418                netif_err(adapter, drv, netdev,
2419                          "Error, device max mtu is smaller than netdev MTU\n");
2420                return -EINVAL;
2421        }
2422
2423        return 0;
2424}
2425
2426static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
2427                           struct ena_com_dev_get_features_ctx *get_feat_ctx,
2428                           bool *wd_state)
2429{
2430        struct device *dev = &pdev->dev;
2431        bool readless_supported;
2432        u32 aenq_groups;
2433        int dma_width;
2434        int rc;
2435
2436        rc = ena_com_mmio_reg_read_request_init(ena_dev);
2437        if (rc) {
2438                dev_err(dev, "failed to init mmio read less\n");
2439                return rc;
2440        }
2441
2442        /* The PCIe configuration space revision id indicate if mmio reg
2443         * read is disabled
2444         */
2445        readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
2446        ena_com_set_mmio_read_mode(ena_dev, readless_supported);
2447
2448        rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
2449        if (rc) {
2450                dev_err(dev, "Can not reset device\n");
2451                goto err_mmio_read_less;
2452        }
2453
2454        rc = ena_com_validate_version(ena_dev);
2455        if (rc) {
2456                dev_err(dev, "device version is too low\n");
2457                goto err_mmio_read_less;
2458        }
2459
2460        dma_width = ena_com_get_dma_width(ena_dev);
2461        if (dma_width < 0) {
2462                dev_err(dev, "Invalid dma width value %d", dma_width);
2463                rc = dma_width;
2464                goto err_mmio_read_less;
2465        }
2466
2467        rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
2468        if (rc) {
2469                dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc);
2470                goto err_mmio_read_less;
2471        }
2472
2473        rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
2474        if (rc) {
2475                dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n",
2476                        rc);
2477                goto err_mmio_read_less;
2478        }
2479
2480        /* ENA admin level init */
2481        rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
2482        if (rc) {
2483                dev_err(dev,
2484                        "Can not initialize ena admin queue with device\n");
2485                goto err_mmio_read_less;
2486        }
2487
2488        /* To enable the msix interrupts the driver needs to know the number
2489         * of queues. So the driver uses polling mode to retrieve this
2490         * information
2491         */
2492        ena_com_set_admin_polling_mode(ena_dev, true);
2493
2494        ena_config_host_info(ena_dev);
2495
2496        /* Get Device Attributes*/
2497        rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
2498        if (rc) {
2499                dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
2500                goto err_admin_init;
2501        }
2502
2503        /* Try to turn all the available aenq groups */
2504        aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
2505                BIT(ENA_ADMIN_FATAL_ERROR) |
2506                BIT(ENA_ADMIN_WARNING) |
2507                BIT(ENA_ADMIN_NOTIFICATION) |
2508                BIT(ENA_ADMIN_KEEP_ALIVE);
2509
2510        aenq_groups &= get_feat_ctx->aenq.supported_groups;
2511
2512        rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
2513        if (rc) {
2514                dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
2515                goto err_admin_init;
2516        }
2517
2518        *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
2519
2520        return 0;
2521
2522err_admin_init:
2523        ena_com_delete_host_info(ena_dev);
2524        ena_com_admin_destroy(ena_dev);
2525err_mmio_read_less:
2526        ena_com_mmio_reg_read_request_destroy(ena_dev);
2527
2528        return rc;
2529}
2530
2531static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
2532                                                    int io_vectors)
2533{
2534        struct ena_com_dev *ena_dev = adapter->ena_dev;
2535        struct device *dev = &adapter->pdev->dev;
2536        int rc;
2537
2538        rc = ena_enable_msix(adapter, io_vectors);
2539        if (rc) {
2540                dev_err(dev, "Can not reserve msix vectors\n");
2541                return rc;
2542        }
2543
2544        ena_setup_mgmnt_intr(adapter);
2545
2546        rc = ena_request_mgmnt_irq(adapter);
2547        if (rc) {
2548                dev_err(dev, "Can not setup management interrupts\n");
2549                goto err_disable_msix;
2550        }
2551
2552        ena_com_set_admin_polling_mode(ena_dev, false);
2553
2554        ena_com_admin_aenq_enable(ena_dev);
2555
2556        return 0;
2557
2558err_disable_msix:
2559        ena_disable_msix(adapter);
2560
2561        return rc;
2562}
2563
2564static void ena_fw_reset_device(struct work_struct *work)
2565{
2566        struct ena_com_dev_get_features_ctx get_feat_ctx;
2567        struct ena_adapter *adapter =
2568                container_of(work, struct ena_adapter, reset_task);
2569        struct net_device *netdev = adapter->netdev;
2570        struct ena_com_dev *ena_dev = adapter->ena_dev;
2571        struct pci_dev *pdev = adapter->pdev;
2572        bool dev_up, wd_state;
2573        int rc;
2574
2575        if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2576                dev_err(&pdev->dev,
2577                        "device reset schedule while reset bit is off\n");
2578                return;
2579        }
2580
2581        netif_carrier_off(netdev);
2582
2583        del_timer_sync(&adapter->timer_service);
2584
2585        rtnl_lock();
2586
2587        dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2588        ena_com_set_admin_running_state(ena_dev, false);
2589
2590        /* After calling ena_close the tx queues and the napi
2591         * are disabled so no one can interfere or touch the
2592         * data structures
2593         */
2594        ena_close(netdev);
2595
2596        ena_free_mgmnt_irq(adapter);
2597
2598        ena_disable_msix(adapter);
2599
2600        ena_com_abort_admin_commands(ena_dev);
2601
2602        ena_com_wait_for_abort_completion(ena_dev);
2603
2604        ena_com_admin_destroy(ena_dev);
2605
2606        ena_com_mmio_reg_read_request_destroy(ena_dev);
2607
2608        adapter->reset_reason = ENA_REGS_RESET_NORMAL;
2609        clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2610
2611        /* Finish with the destroy part. Start the init part */
2612
2613        rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
2614        if (rc) {
2615                dev_err(&pdev->dev, "Can not initialize device\n");
2616                goto err;
2617        }
2618        adapter->wd_state = wd_state;
2619
2620        rc = ena_device_validate_params(adapter, &get_feat_ctx);
2621        if (rc) {
2622                dev_err(&pdev->dev, "Validation of device parameters failed\n");
2623                goto err_device_destroy;
2624        }
2625
2626        rc = ena_enable_msix_and_set_admin_interrupts(adapter,
2627                                                      adapter->num_queues);
2628        if (rc) {
2629                dev_err(&pdev->dev, "Enable MSI-X failed\n");
2630                goto err_device_destroy;
2631        }
2632        /* If the interface was up before the reset bring it up */
2633        if (dev_up) {
2634                rc = ena_up(adapter);
2635                if (rc) {
2636                        dev_err(&pdev->dev, "Failed to create I/O queues\n");
2637                        goto err_disable_msix;
2638                }
2639        }
2640
2641        mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
2642
2643        rtnl_unlock();
2644
2645        dev_err(&pdev->dev, "Device reset completed successfully\n");
2646
2647        return;
2648err_disable_msix:
2649        ena_free_mgmnt_irq(adapter);
2650        ena_disable_msix(adapter);
2651err_device_destroy:
2652        ena_com_admin_destroy(ena_dev);
2653err:
2654        rtnl_unlock();
2655
2656        clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
2657
2658        dev_err(&pdev->dev,
2659                "Reset attempt failed. Can not reset the device\n");
2660}
2661
2662static int check_missing_comp_in_queue(struct ena_adapter *adapter,
2663                                       struct ena_ring *tx_ring)
2664{
2665        struct ena_tx_buffer *tx_buf;
2666        unsigned long last_jiffies;
2667        u32 missed_tx = 0;
2668        int i;
2669
2670        for (i = 0; i < tx_ring->ring_size; i++) {
2671                tx_buf = &tx_ring->tx_buffer_info[i];
2672                last_jiffies = tx_buf->last_jiffies;
2673                if (unlikely(last_jiffies &&
2674                             time_is_before_jiffies(last_jiffies + adapter->missing_tx_completion_to))) {
2675                        if (!tx_buf->print_once)
2676                                netif_notice(adapter, tx_err, adapter->netdev,
2677                                             "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
2678                                             tx_ring->qid, i);
2679
2680                        tx_buf->print_once = 1;
2681                        missed_tx++;
2682
2683                        if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
2684                                netif_err(adapter, tx_err, adapter->netdev,
2685                                          "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
2686                                          missed_tx,
2687                                          adapter->missing_tx_completion_threshold);
2688                                adapter->reset_reason =
2689                                        ENA_REGS_RESET_MISS_TX_CMPL;
2690                                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2691                                return -EIO;
2692                        }
2693                }
2694        }
2695
2696        return 0;
2697}
2698
2699static void check_for_missing_tx_completions(struct ena_adapter *adapter)
2700{
2701        struct ena_ring *tx_ring;
2702        int i, budget, rc;
2703
2704        /* Make sure the driver doesn't turn the device in other process */
2705        smp_rmb();
2706
2707        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2708                return;
2709
2710        if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
2711                return;
2712
2713        if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
2714                return;
2715
2716        budget = ENA_MONITORED_TX_QUEUES;
2717
2718        for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
2719                tx_ring = &adapter->tx_ring[i];
2720
2721                rc = check_missing_comp_in_queue(adapter, tx_ring);
2722                if (unlikely(rc))
2723                        return;
2724
2725                budget--;
2726                if (!budget)
2727                        break;
2728        }
2729
2730        adapter->last_monitored_tx_qid = i % adapter->num_queues;
2731}
2732
2733/* trigger napi schedule after 2 consecutive detections */
2734#define EMPTY_RX_REFILL 2
2735/* For the rare case where the device runs out of Rx descriptors and the
2736 * napi handler failed to refill new Rx descriptors (due to a lack of memory
2737 * for example).
2738 * This case will lead to a deadlock:
2739 * The device won't send interrupts since all the new Rx packets will be dropped
2740 * The napi handler won't allocate new Rx descriptors so the device will be
2741 * able to send new packets.
2742 *
2743 * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
2744 * It is recommended to have at least 512MB, with a minimum of 128MB for
2745 * constrained environment).
2746 *
2747 * When such a situation is detected - Reschedule napi
2748 */
2749static void check_for_empty_rx_ring(struct ena_adapter *adapter)
2750{
2751        struct ena_ring *rx_ring;
2752        int i, refill_required;
2753
2754        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2755                return;
2756
2757        if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
2758                return;
2759
2760        for (i = 0; i < adapter->num_queues; i++) {
2761                rx_ring = &adapter->rx_ring[i];
2762
2763                refill_required =
2764                        ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
2765                if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
2766                        rx_ring->empty_rx_queue++;
2767
2768                        if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
2769                                u64_stats_update_begin(&rx_ring->syncp);
2770                                rx_ring->rx_stats.empty_rx_ring++;
2771                                u64_stats_update_end(&rx_ring->syncp);
2772
2773                                netif_err(adapter, drv, adapter->netdev,
2774                                          "trigger refill for ring %d\n", i);
2775
2776                                napi_schedule(rx_ring->napi);
2777                                rx_ring->empty_rx_queue = 0;
2778                        }
2779                } else {
2780                        rx_ring->empty_rx_queue = 0;
2781                }
2782        }
2783}
2784
2785/* Check for keep alive expiration */
2786static void check_for_missing_keep_alive(struct ena_adapter *adapter)
2787{
2788        unsigned long keep_alive_expired;
2789
2790        if (!adapter->wd_state)
2791                return;
2792
2793        if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2794                return;
2795
2796        keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies +
2797                                           adapter->keep_alive_timeout);
2798        if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
2799                netif_err(adapter, drv, adapter->netdev,
2800                          "Keep alive watchdog timeout.\n");
2801                u64_stats_update_begin(&adapter->syncp);
2802                adapter->dev_stats.wd_expired++;
2803                u64_stats_update_end(&adapter->syncp);
2804                adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
2805                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2806        }
2807}
2808
2809static void check_for_admin_com_state(struct ena_adapter *adapter)
2810{
2811        if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
2812                netif_err(adapter, drv, adapter->netdev,
2813                          "ENA admin queue is not in running state!\n");
2814                u64_stats_update_begin(&adapter->syncp);
2815                adapter->dev_stats.admin_q_pause++;
2816                u64_stats_update_end(&adapter->syncp);
2817                adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
2818                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2819        }
2820}
2821
2822static void ena_update_hints(struct ena_adapter *adapter,
2823                             struct ena_admin_ena_hw_hints *hints)
2824{
2825        struct net_device *netdev = adapter->netdev;
2826
2827        if (hints->admin_completion_tx_timeout)
2828                adapter->ena_dev->admin_queue.completion_timeout =
2829                        hints->admin_completion_tx_timeout * 1000;
2830
2831        if (hints->mmio_read_timeout)
2832                /* convert to usec */
2833                adapter->ena_dev->mmio_read.reg_read_to =
2834                        hints->mmio_read_timeout * 1000;
2835
2836        if (hints->missed_tx_completion_count_threshold_to_reset)
2837                adapter->missing_tx_completion_threshold =
2838                        hints->missed_tx_completion_count_threshold_to_reset;
2839
2840        if (hints->missing_tx_completion_timeout) {
2841                if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2842                        adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
2843                else
2844                        adapter->missing_tx_completion_to =
2845                                msecs_to_jiffies(hints->missing_tx_completion_timeout);
2846        }
2847
2848        if (hints->netdev_wd_timeout)
2849                netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
2850
2851        if (hints->driver_watchdog_timeout) {
2852                if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2853                        adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
2854                else
2855                        adapter->keep_alive_timeout =
2856                                msecs_to_jiffies(hints->driver_watchdog_timeout);
2857        }
2858}
2859
2860static void ena_update_host_info(struct ena_admin_host_info *host_info,
2861                                 struct net_device *netdev)
2862{
2863        host_info->supported_network_features[0] =
2864                netdev->features & GENMASK_ULL(31, 0);
2865        host_info->supported_network_features[1] =
2866                (netdev->features & GENMASK_ULL(63, 32)) >> 32;
2867}
2868
2869static void ena_timer_service(unsigned long data)
2870{
2871        struct ena_adapter *adapter = (struct ena_adapter *)data;
2872        u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
2873        struct ena_admin_host_info *host_info =
2874                adapter->ena_dev->host_attr.host_info;
2875
2876        check_for_missing_keep_alive(adapter);
2877
2878        check_for_admin_com_state(adapter);
2879
2880        check_for_missing_tx_completions(adapter);
2881
2882        check_for_empty_rx_ring(adapter);
2883
2884        if (debug_area)
2885                ena_dump_stats_to_buf(adapter, debug_area);
2886
2887        if (host_info)
2888                ena_update_host_info(host_info, adapter->netdev);
2889
2890        if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2891                netif_err(adapter, drv, adapter->netdev,
2892                          "Trigger reset is on\n");
2893                ena_dump_stats_to_dmesg(adapter);
2894                queue_work(ena_wq, &adapter->reset_task);
2895                return;
2896        }
2897
2898        /* Reset the timer */
2899        mod_timer(&adapter->timer_service, jiffies + HZ);
2900}
2901
2902static int ena_calc_io_queue_num(struct pci_dev *pdev,
2903                                 struct ena_com_dev *ena_dev,
2904                                 struct ena_com_dev_get_features_ctx *get_feat_ctx)
2905{
2906        int io_sq_num, io_queue_num;
2907
2908        /* In case of LLQ use the llq number in the get feature cmd */
2909        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2910                io_sq_num = get_feat_ctx->max_queues.max_llq_num;
2911
2912                if (io_sq_num == 0) {
2913                        dev_err(&pdev->dev,
2914                                "Trying to use LLQ but llq_num is 0. Fall back into regular queues\n");
2915
2916                        ena_dev->tx_mem_queue_type =
2917                                ENA_ADMIN_PLACEMENT_POLICY_HOST;
2918                        io_sq_num = get_feat_ctx->max_queues.max_sq_num;
2919                }
2920        } else {
2921                io_sq_num = get_feat_ctx->max_queues.max_sq_num;
2922        }
2923
2924        io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
2925        io_queue_num = min_t(int, io_queue_num, io_sq_num);
2926        io_queue_num = min_t(int, io_queue_num,
2927                             get_feat_ctx->max_queues.max_cq_num);
2928        /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
2929        io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1);
2930        if (unlikely(!io_queue_num)) {
2931                dev_err(&pdev->dev, "The device doesn't have io queues\n");
2932                return -EFAULT;
2933        }
2934
2935        return io_queue_num;
2936}
2937
2938static void ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
2939                              struct ena_com_dev_get_features_ctx *get_feat_ctx)
2940{
2941        bool has_mem_bar;
2942
2943        has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR);
2944
2945        /* Enable push mode if device supports LLQ */
2946        if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0))
2947                ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
2948        else
2949                ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2950}
2951
2952static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
2953                                 struct net_device *netdev)
2954{
2955        netdev_features_t dev_features = 0;
2956
2957        /* Set offload features */
2958        if (feat->offload.tx &
2959                ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
2960                dev_features |= NETIF_F_IP_CSUM;
2961
2962        if (feat->offload.tx &
2963                ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
2964                dev_features |= NETIF_F_IPV6_CSUM;
2965
2966        if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
2967                dev_features |= NETIF_F_TSO;
2968
2969        if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
2970                dev_features |= NETIF_F_TSO6;
2971
2972        if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
2973                dev_features |= NETIF_F_TSO_ECN;
2974
2975        if (feat->offload.rx_supported &
2976                ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
2977                dev_features |= NETIF_F_RXCSUM;
2978
2979        if (feat->offload.rx_supported &
2980                ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
2981                dev_features |= NETIF_F_RXCSUM;
2982
2983        netdev->features =
2984                dev_features |
2985                NETIF_F_SG |
2986                NETIF_F_RXHASH |
2987                NETIF_F_HIGHDMA;
2988
2989        netdev->hw_features |= netdev->features;
2990        netdev->vlan_features |= netdev->features;
2991}
2992
2993static void ena_set_conf_feat_params(struct ena_adapter *adapter,
2994                                     struct ena_com_dev_get_features_ctx *feat)
2995{
2996        struct net_device *netdev = adapter->netdev;
2997
2998        /* Copy mac address */
2999        if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
3000                eth_hw_addr_random(netdev);
3001                ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
3002        } else {
3003                ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
3004                ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
3005        }
3006
3007        /* Set offload features */
3008        ena_set_dev_offloads(feat, netdev);
3009
3010        adapter->max_mtu = feat->dev_attr.max_mtu;
3011        netdev->max_mtu = adapter->max_mtu;
3012        netdev->min_mtu = ENA_MIN_MTU;
3013}
3014
3015static int ena_rss_init_default(struct ena_adapter *adapter)
3016{
3017        struct ena_com_dev *ena_dev = adapter->ena_dev;
3018        struct device *dev = &adapter->pdev->dev;
3019        int rc, i;
3020        u32 val;
3021
3022        rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
3023        if (unlikely(rc)) {
3024                dev_err(dev, "Cannot init indirect table\n");
3025                goto err_rss_init;
3026        }
3027
3028        for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3029                val = ethtool_rxfh_indir_default(i, adapter->num_queues);
3030                rc = ena_com_indirect_table_fill_entry(ena_dev, i,
3031                                                       ENA_IO_RXQ_IDX(val));
3032                if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3033                        dev_err(dev, "Cannot fill indirect table\n");
3034                        goto err_fill_indir;
3035                }
3036        }
3037
3038        rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
3039                                        ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
3040        if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3041                dev_err(dev, "Cannot fill hash function\n");
3042                goto err_fill_indir;
3043        }
3044
3045        rc = ena_com_set_default_hash_ctrl(ena_dev);
3046        if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3047                dev_err(dev, "Cannot fill hash control\n");
3048                goto err_fill_indir;
3049        }
3050
3051        return 0;
3052
3053err_fill_indir:
3054        ena_com_rss_destroy(ena_dev);
3055err_rss_init:
3056
3057        return rc;
3058}
3059
3060static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
3061{
3062        int release_bars;
3063
3064        if (ena_dev->mem_bar)
3065                devm_iounmap(&pdev->dev, ena_dev->mem_bar);
3066
3067        if (ena_dev->reg_bar)
3068                devm_iounmap(&pdev->dev, ena_dev->reg_bar);
3069
3070        release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3071        pci_release_selected_regions(pdev, release_bars);
3072}
3073
3074static int ena_calc_queue_size(struct pci_dev *pdev,
3075                               struct ena_com_dev *ena_dev,
3076                               u16 *max_tx_sgl_size,
3077                               u16 *max_rx_sgl_size,
3078                               struct ena_com_dev_get_features_ctx *get_feat_ctx)
3079{
3080        u32 queue_size = ENA_DEFAULT_RING_SIZE;
3081
3082        queue_size = min_t(u32, queue_size,
3083                           get_feat_ctx->max_queues.max_cq_depth);
3084        queue_size = min_t(u32, queue_size,
3085                           get_feat_ctx->max_queues.max_sq_depth);
3086
3087        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3088                queue_size = min_t(u32, queue_size,
3089                                   get_feat_ctx->max_queues.max_llq_depth);
3090
3091        queue_size = rounddown_pow_of_two(queue_size);
3092
3093        if (unlikely(!queue_size)) {
3094                dev_err(&pdev->dev, "Invalid queue size\n");
3095                return -EFAULT;
3096        }
3097
3098        *max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
3099                                 get_feat_ctx->max_queues.max_packet_tx_descs);
3100        *max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
3101                                 get_feat_ctx->max_queues.max_packet_rx_descs);
3102
3103        return queue_size;
3104}
3105
3106/* ena_probe - Device Initialization Routine
3107 * @pdev: PCI device information struct
3108 * @ent: entry in ena_pci_tbl
3109 *
3110 * Returns 0 on success, negative on failure
3111 *
3112 * ena_probe initializes an adapter identified by a pci_dev structure.
3113 * The OS initialization, configuring of the adapter private structure,
3114 * and a hardware reset occur.
3115 */
3116static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
3117{
3118        struct ena_com_dev_get_features_ctx get_feat_ctx;
3119        static int version_printed;
3120        struct net_device *netdev;
3121        struct ena_adapter *adapter;
3122        struct ena_com_dev *ena_dev = NULL;
3123        static int adapters_found;
3124        int io_queue_num, bars, rc;
3125        int queue_size;
3126        u16 tx_sgl_size = 0;
3127        u16 rx_sgl_size = 0;
3128        bool wd_state;
3129
3130        dev_dbg(&pdev->dev, "%s\n", __func__);
3131
3132        if (version_printed++ == 0)
3133                dev_info(&pdev->dev, "%s", version);
3134
3135        rc = pci_enable_device_mem(pdev);
3136        if (rc) {
3137                dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
3138                return rc;
3139        }
3140
3141        pci_set_master(pdev);
3142
3143        ena_dev = vzalloc(sizeof(*ena_dev));
3144        if (!ena_dev) {
3145                rc = -ENOMEM;
3146                goto err_disable_device;
3147        }
3148
3149        bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3150        rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
3151        if (rc) {
3152                dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
3153                        rc);
3154                goto err_free_ena_dev;
3155        }
3156
3157        ena_dev->reg_bar = devm_ioremap(&pdev->dev,
3158                                        pci_resource_start(pdev, ENA_REG_BAR),
3159                                        pci_resource_len(pdev, ENA_REG_BAR));
3160        if (!ena_dev->reg_bar) {
3161                dev_err(&pdev->dev, "failed to remap regs bar\n");
3162                rc = -EFAULT;
3163                goto err_free_region;
3164        }
3165
3166        ena_dev->dmadev = &pdev->dev;
3167
3168        rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
3169        if (rc) {
3170                dev_err(&pdev->dev, "ena device init failed\n");
3171                if (rc == -ETIME)
3172                        rc = -EPROBE_DEFER;
3173                goto err_free_region;
3174        }
3175
3176        ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
3177
3178        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
3179                ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3180                                                   pci_resource_start(pdev, ENA_MEM_BAR),
3181                                                   pci_resource_len(pdev, ENA_MEM_BAR));
3182                if (!ena_dev->mem_bar) {
3183                        rc = -EFAULT;
3184                        goto err_device_destroy;
3185                }
3186        }
3187
3188        /* initial Tx interrupt delay, Assumes 1 usec granularity.
3189        * Updated during device initialization with the real granularity
3190        */
3191        ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
3192        io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx);
3193        queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size,
3194                                         &rx_sgl_size, &get_feat_ctx);
3195        if ((queue_size <= 0) || (io_queue_num <= 0)) {
3196                rc = -EFAULT;
3197                goto err_device_destroy;
3198        }
3199
3200        dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n",
3201                 io_queue_num, queue_size);
3202
3203        /* dev zeroed in init_etherdev */
3204        netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num);
3205        if (!netdev) {
3206                dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
3207                rc = -ENOMEM;
3208                goto err_device_destroy;
3209        }
3210
3211        SET_NETDEV_DEV(netdev, &pdev->dev);
3212
3213        adapter = netdev_priv(netdev);
3214        pci_set_drvdata(pdev, adapter);
3215
3216        adapter->ena_dev = ena_dev;
3217        adapter->netdev = netdev;
3218        adapter->pdev = pdev;
3219
3220        ena_set_conf_feat_params(adapter, &get_feat_ctx);
3221
3222        adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
3223        adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3224
3225        adapter->tx_ring_size = queue_size;
3226        adapter->rx_ring_size = queue_size;
3227
3228        adapter->max_tx_sgl_size = tx_sgl_size;
3229        adapter->max_rx_sgl_size = rx_sgl_size;
3230
3231        adapter->num_queues = io_queue_num;
3232        adapter->last_monitored_tx_qid = 0;
3233
3234        adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
3235        adapter->wd_state = wd_state;
3236
3237        snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
3238
3239        rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
3240        if (rc) {
3241                dev_err(&pdev->dev,
3242                        "Failed to query interrupt moderation feature\n");
3243                goto err_netdev_destroy;
3244        }
3245        ena_init_io_rings(adapter);
3246
3247        netdev->netdev_ops = &ena_netdev_ops;
3248        netdev->watchdog_timeo = TX_TIMEOUT;
3249        ena_set_ethtool_ops(netdev);
3250
3251        netdev->priv_flags |= IFF_UNICAST_FLT;
3252
3253        u64_stats_init(&adapter->syncp);
3254
3255        rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
3256        if (rc) {
3257                dev_err(&pdev->dev,
3258                        "Failed to enable and set the admin interrupts\n");
3259                goto err_worker_destroy;
3260        }
3261        rc = ena_rss_init_default(adapter);
3262        if (rc && (rc != -EOPNOTSUPP)) {
3263                dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
3264                goto err_free_msix;
3265        }
3266
3267        ena_config_debug_area(adapter);
3268
3269        memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
3270
3271        netif_carrier_off(netdev);
3272
3273        rc = register_netdev(netdev);
3274        if (rc) {
3275                dev_err(&pdev->dev, "Cannot register net device\n");
3276                goto err_rss;
3277        }
3278
3279        INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend);
3280        INIT_WORK(&adapter->resume_io_task, ena_device_io_resume);
3281        INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
3282
3283        adapter->last_keep_alive_jiffies = jiffies;
3284        adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
3285        adapter->missing_tx_completion_to = TX_TIMEOUT;
3286        adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
3287
3288        ena_update_hints(adapter, &get_feat_ctx.hw_hints);
3289
3290        setup_timer(&adapter->timer_service, ena_timer_service,
3291                    (unsigned long)adapter);
3292        mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3293
3294        dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n",
3295                 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
3296                 netdev->dev_addr, io_queue_num);
3297
3298        set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3299
3300        adapters_found++;
3301
3302        return 0;
3303
3304err_rss:
3305        ena_com_delete_debug_area(ena_dev);
3306        ena_com_rss_destroy(ena_dev);
3307err_free_msix:
3308        ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
3309        ena_free_mgmnt_irq(adapter);
3310        ena_disable_msix(adapter);
3311err_worker_destroy:
3312        ena_com_destroy_interrupt_moderation(ena_dev);
3313        del_timer(&adapter->timer_service);
3314        cancel_work_sync(&adapter->suspend_io_task);
3315        cancel_work_sync(&adapter->resume_io_task);
3316err_netdev_destroy:
3317        free_netdev(netdev);
3318err_device_destroy:
3319        ena_com_delete_host_info(ena_dev);
3320        ena_com_admin_destroy(ena_dev);
3321err_free_region:
3322        ena_release_bars(ena_dev, pdev);
3323err_free_ena_dev:
3324        vfree(ena_dev);
3325err_disable_device:
3326        pci_disable_device(pdev);
3327        return rc;
3328}
3329
3330/*****************************************************************************/
3331static int ena_sriov_configure(struct pci_dev *dev, int numvfs)
3332{
3333        int rc;
3334
3335        if (numvfs > 0) {
3336                rc = pci_enable_sriov(dev, numvfs);
3337                if (rc != 0) {
3338                        dev_err(&dev->dev,
3339                                "pci_enable_sriov failed to enable: %d vfs with the error: %d\n",
3340                                numvfs, rc);
3341                        return rc;
3342                }
3343
3344                return numvfs;
3345        }
3346
3347        if (numvfs == 0) {
3348                pci_disable_sriov(dev);
3349                return 0;
3350        }
3351
3352        return -EINVAL;
3353}
3354
3355/*****************************************************************************/
3356/*****************************************************************************/
3357
3358/* ena_remove - Device Removal Routine
3359 * @pdev: PCI device information struct
3360 *
3361 * ena_remove is called by the PCI subsystem to alert the driver
3362 * that it should release a PCI device.
3363 */
3364static void ena_remove(struct pci_dev *pdev)
3365{
3366        struct ena_adapter *adapter = pci_get_drvdata(pdev);
3367        struct ena_com_dev *ena_dev;
3368        struct net_device *netdev;
3369
3370        ena_dev = adapter->ena_dev;
3371        netdev = adapter->netdev;
3372
3373#ifdef CONFIG_RFS_ACCEL
3374        if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
3375                free_irq_cpu_rmap(netdev->rx_cpu_rmap);
3376                netdev->rx_cpu_rmap = NULL;
3377        }
3378#endif /* CONFIG_RFS_ACCEL */
3379
3380        unregister_netdev(netdev);
3381        del_timer_sync(&adapter->timer_service);
3382
3383        cancel_work_sync(&adapter->reset_task);
3384
3385        cancel_work_sync(&adapter->suspend_io_task);
3386
3387        cancel_work_sync(&adapter->resume_io_task);
3388
3389        /* Reset the device only if the device is running. */
3390        if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
3391                ena_com_dev_reset(ena_dev, adapter->reset_reason);
3392
3393        ena_free_mgmnt_irq(adapter);
3394
3395        ena_disable_msix(adapter);
3396
3397        free_netdev(netdev);
3398
3399        ena_com_mmio_reg_read_request_destroy(ena_dev);
3400
3401        ena_com_abort_admin_commands(ena_dev);
3402
3403        ena_com_wait_for_abort_completion(ena_dev);
3404
3405        ena_com_admin_destroy(ena_dev);
3406
3407        ena_com_rss_destroy(ena_dev);
3408
3409        ena_com_delete_debug_area(ena_dev);
3410
3411        ena_com_delete_host_info(ena_dev);
3412
3413        ena_release_bars(ena_dev, pdev);
3414
3415        pci_disable_device(pdev);
3416
3417        ena_com_destroy_interrupt_moderation(ena_dev);
3418
3419        vfree(ena_dev);
3420}
3421
3422static struct pci_driver ena_pci_driver = {
3423        .name           = DRV_MODULE_NAME,
3424        .id_table       = ena_pci_tbl,
3425        .probe          = ena_probe,
3426        .remove         = ena_remove,
3427        .sriov_configure = ena_sriov_configure,
3428};
3429
3430static int __init ena_init(void)
3431{
3432        pr_info("%s", version);
3433
3434        ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
3435        if (!ena_wq) {
3436                pr_err("Failed to create workqueue\n");
3437                return -ENOMEM;
3438        }
3439
3440        return pci_register_driver(&ena_pci_driver);
3441}
3442
3443static void __exit ena_cleanup(void)
3444{
3445        pci_unregister_driver(&ena_pci_driver);
3446
3447        if (ena_wq) {
3448                destroy_workqueue(ena_wq);
3449                ena_wq = NULL;
3450        }
3451}
3452
3453/******************************************************************************
3454 ******************************** AENQ Handlers *******************************
3455 *****************************************************************************/
3456/* ena_update_on_link_change:
3457 * Notify the network interface about the change in link status
3458 */
3459static void ena_update_on_link_change(void *adapter_data,
3460                                      struct ena_admin_aenq_entry *aenq_e)
3461{
3462        struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3463        struct ena_admin_aenq_link_change_desc *aenq_desc =
3464                (struct ena_admin_aenq_link_change_desc *)aenq_e;
3465        int status = aenq_desc->flags &
3466                ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3467
3468        if (status) {
3469                netdev_dbg(adapter->netdev, "%s\n", __func__);
3470                set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
3471                netif_carrier_on(adapter->netdev);
3472        } else {
3473                clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
3474                netif_carrier_off(adapter->netdev);
3475        }
3476}
3477
3478static void ena_keep_alive_wd(void *adapter_data,
3479                              struct ena_admin_aenq_entry *aenq_e)
3480{
3481        struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3482        struct ena_admin_aenq_keep_alive_desc *desc;
3483        u64 rx_drops;
3484
3485        desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
3486        adapter->last_keep_alive_jiffies = jiffies;
3487
3488        rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
3489
3490        u64_stats_update_begin(&adapter->syncp);
3491        adapter->dev_stats.rx_drops = rx_drops;
3492        u64_stats_update_end(&adapter->syncp);
3493}
3494
3495static void ena_notification(void *adapter_data,
3496                             struct ena_admin_aenq_entry *aenq_e)
3497{
3498        struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3499        struct ena_admin_ena_hw_hints *hints;
3500
3501        WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
3502             "Invalid group(%x) expected %x\n",
3503             aenq_e->aenq_common_desc.group,
3504             ENA_ADMIN_NOTIFICATION);
3505
3506        switch (aenq_e->aenq_common_desc.syndrom) {
3507        case ENA_ADMIN_SUSPEND:
3508                /* Suspend just the IO queues.
3509                 * We deliberately don't suspend admin so the timer and
3510                 * the keep_alive events should remain.
3511                 */
3512                queue_work(ena_wq, &adapter->suspend_io_task);
3513                break;
3514        case ENA_ADMIN_RESUME:
3515                queue_work(ena_wq, &adapter->resume_io_task);
3516                break;
3517        case ENA_ADMIN_UPDATE_HINTS:
3518                hints = (struct ena_admin_ena_hw_hints *)
3519                        (&aenq_e->inline_data_w4);
3520                ena_update_hints(adapter, hints);
3521                break;
3522        default:
3523                netif_err(adapter, drv, adapter->netdev,
3524                          "Invalid aenq notification link state %d\n",
3525                          aenq_e->aenq_common_desc.syndrom);
3526        }
3527}
3528
3529/* This handler will called for unknown event group or unimplemented handlers*/
3530static void unimplemented_aenq_handler(void *data,
3531                                       struct ena_admin_aenq_entry *aenq_e)
3532{
3533        struct ena_adapter *adapter = (struct ena_adapter *)data;
3534
3535        netif_err(adapter, drv, adapter->netdev,
3536                  "Unknown event was received or event with unimplemented handler\n");
3537}
3538
3539static struct ena_aenq_handlers aenq_handlers = {
3540        .handlers = {
3541                [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
3542                [ENA_ADMIN_NOTIFICATION] = ena_notification,
3543                [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
3544        },
3545        .unimplemented_handler = unimplemented_aenq_handler
3546};
3547
3548module_init(ena_init);
3549module_exit(ena_cleanup);
3550