linux/drivers/net/ethernet/amazon/ena/ena_netdev.c
<<
>>
Prefs
   1/*
   2 * Copyright 2015 Amazon.com, Inc. or its affiliates.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  34
  35#ifdef CONFIG_RFS_ACCEL
  36#include <linux/cpu_rmap.h>
  37#endif /* CONFIG_RFS_ACCEL */
  38#include <linux/ethtool.h>
  39#include <linux/if_vlan.h>
  40#include <linux/kernel.h>
  41#include <linux/module.h>
  42#include <linux/numa.h>
  43#include <linux/pci.h>
  44#include <linux/utsname.h>
  45#include <linux/version.h>
  46#include <linux/vmalloc.h>
  47#include <net/ip.h>
  48
  49#include "ena_netdev.h"
  50#include "ena_pci_id_tbl.h"
  51
  52static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n";
  53
  54MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
  55MODULE_DESCRIPTION(DEVICE_NAME);
  56MODULE_LICENSE("GPL");
  57MODULE_VERSION(DRV_MODULE_VERSION);
  58
  59/* Time in jiffies before concluding the transmitter is hung. */
  60#define TX_TIMEOUT  (5 * HZ)
  61
  62#define ENA_NAPI_BUDGET 64
  63
  64#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
  65                NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
  66static int debug = -1;
  67module_param(debug, int, 0);
  68MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
  69
  70static struct ena_aenq_handlers aenq_handlers;
  71
  72static struct workqueue_struct *ena_wq;
  73
  74MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
  75
  76static int ena_rss_init_default(struct ena_adapter *adapter);
  77static void check_for_admin_com_state(struct ena_adapter *adapter);
  78static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
  79static int ena_restore_device(struct ena_adapter *adapter);
  80
  81static void ena_tx_timeout(struct net_device *dev)
  82{
  83        struct ena_adapter *adapter = netdev_priv(dev);
  84
  85        /* Change the state of the device to trigger reset
  86         * Check that we are not in the middle or a trigger already
  87         */
  88
  89        if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
  90                return;
  91
  92        adapter->reset_reason = ENA_REGS_RESET_OS_NETDEV_WD;
  93        u64_stats_update_begin(&adapter->syncp);
  94        adapter->dev_stats.tx_timeout++;
  95        u64_stats_update_end(&adapter->syncp);
  96
  97        netif_err(adapter, tx_err, dev, "Transmit time out\n");
  98}
  99
 100static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
 101{
 102        int i;
 103
 104        for (i = 0; i < adapter->num_queues; i++)
 105                adapter->rx_ring[i].mtu = mtu;
 106}
 107
 108static int ena_change_mtu(struct net_device *dev, int new_mtu)
 109{
 110        struct ena_adapter *adapter = netdev_priv(dev);
 111        int ret;
 112
 113        ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
 114        if (!ret) {
 115                netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
 116                update_rx_ring_mtu(adapter, new_mtu);
 117                dev->mtu = new_mtu;
 118        } else {
 119                netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
 120                          new_mtu);
 121        }
 122
 123        return ret;
 124}
 125
 126static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
 127{
 128#ifdef CONFIG_RFS_ACCEL
 129        u32 i;
 130        int rc;
 131
 132        adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues);
 133        if (!adapter->netdev->rx_cpu_rmap)
 134                return -ENOMEM;
 135        for (i = 0; i < adapter->num_queues; i++) {
 136                int irq_idx = ENA_IO_IRQ_IDX(i);
 137
 138                rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
 139                                      pci_irq_vector(adapter->pdev, irq_idx));
 140                if (rc) {
 141                        free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
 142                        adapter->netdev->rx_cpu_rmap = NULL;
 143                        return rc;
 144                }
 145        }
 146#endif /* CONFIG_RFS_ACCEL */
 147        return 0;
 148}
 149
 150static void ena_init_io_rings_common(struct ena_adapter *adapter,
 151                                     struct ena_ring *ring, u16 qid)
 152{
 153        ring->qid = qid;
 154        ring->pdev = adapter->pdev;
 155        ring->dev = &adapter->pdev->dev;
 156        ring->netdev = adapter->netdev;
 157        ring->napi = &adapter->ena_napi[qid].napi;
 158        ring->adapter = adapter;
 159        ring->ena_dev = adapter->ena_dev;
 160        ring->per_napi_packets = 0;
 161        ring->per_napi_bytes = 0;
 162        ring->cpu = 0;
 163        ring->first_interrupt = false;
 164        ring->no_interrupt_event_cnt = 0;
 165        u64_stats_init(&ring->syncp);
 166}
 167
 168static void ena_init_io_rings(struct ena_adapter *adapter)
 169{
 170        struct ena_com_dev *ena_dev;
 171        struct ena_ring *txr, *rxr;
 172        int i;
 173
 174        ena_dev = adapter->ena_dev;
 175
 176        for (i = 0; i < adapter->num_queues; i++) {
 177                txr = &adapter->tx_ring[i];
 178                rxr = &adapter->rx_ring[i];
 179
 180                /* TX/RX common ring state */
 181                ena_init_io_rings_common(adapter, txr, i);
 182                ena_init_io_rings_common(adapter, rxr, i);
 183
 184                /* TX specific ring state */
 185                txr->ring_size = adapter->requested_tx_ring_size;
 186                txr->tx_max_header_size = ena_dev->tx_max_header_size;
 187                txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
 188                txr->sgl_size = adapter->max_tx_sgl_size;
 189                txr->smoothed_interval =
 190                        ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
 191
 192                /* RX specific ring state */
 193                rxr->ring_size = adapter->requested_rx_ring_size;
 194                rxr->rx_copybreak = adapter->rx_copybreak;
 195                rxr->sgl_size = adapter->max_rx_sgl_size;
 196                rxr->smoothed_interval =
 197                        ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
 198                rxr->empty_rx_queue = 0;
 199        }
 200}
 201
 202/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
 203 * @adapter: network interface device structure
 204 * @qid: queue index
 205 *
 206 * Return 0 on success, negative on failure
 207 */
 208static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
 209{
 210        struct ena_ring *tx_ring = &adapter->tx_ring[qid];
 211        struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
 212        int size, i, node;
 213
 214        if (tx_ring->tx_buffer_info) {
 215                netif_err(adapter, ifup,
 216                          adapter->netdev, "tx_buffer_info info is not NULL");
 217                return -EEXIST;
 218        }
 219
 220        size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
 221        node = cpu_to_node(ena_irq->cpu);
 222
 223        tx_ring->tx_buffer_info = vzalloc_node(size, node);
 224        if (!tx_ring->tx_buffer_info) {
 225                tx_ring->tx_buffer_info = vzalloc(size);
 226                if (!tx_ring->tx_buffer_info)
 227                        goto err_tx_buffer_info;
 228        }
 229
 230        size = sizeof(u16) * tx_ring->ring_size;
 231        tx_ring->free_ids = vzalloc_node(size, node);
 232        if (!tx_ring->free_ids) {
 233                tx_ring->free_ids = vzalloc(size);
 234                if (!tx_ring->free_ids)
 235                        goto err_tx_free_ids;
 236        }
 237
 238        size = tx_ring->tx_max_header_size;
 239        tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
 240        if (!tx_ring->push_buf_intermediate_buf) {
 241                tx_ring->push_buf_intermediate_buf = vzalloc(size);
 242                if (!tx_ring->push_buf_intermediate_buf)
 243                        goto err_push_buf_intermediate_buf;
 244        }
 245
 246        /* Req id ring for TX out of order completions */
 247        for (i = 0; i < tx_ring->ring_size; i++)
 248                tx_ring->free_ids[i] = i;
 249
 250        /* Reset tx statistics */
 251        memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
 252
 253        tx_ring->next_to_use = 0;
 254        tx_ring->next_to_clean = 0;
 255        tx_ring->cpu = ena_irq->cpu;
 256        return 0;
 257
 258err_push_buf_intermediate_buf:
 259        vfree(tx_ring->free_ids);
 260        tx_ring->free_ids = NULL;
 261err_tx_free_ids:
 262        vfree(tx_ring->tx_buffer_info);
 263        tx_ring->tx_buffer_info = NULL;
 264err_tx_buffer_info:
 265        return -ENOMEM;
 266}
 267
 268/* ena_free_tx_resources - Free I/O Tx Resources per Queue
 269 * @adapter: network interface device structure
 270 * @qid: queue index
 271 *
 272 * Free all transmit software resources
 273 */
 274static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
 275{
 276        struct ena_ring *tx_ring = &adapter->tx_ring[qid];
 277
 278        vfree(tx_ring->tx_buffer_info);
 279        tx_ring->tx_buffer_info = NULL;
 280
 281        vfree(tx_ring->free_ids);
 282        tx_ring->free_ids = NULL;
 283
 284        vfree(tx_ring->push_buf_intermediate_buf);
 285        tx_ring->push_buf_intermediate_buf = NULL;
 286}
 287
 288/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues
 289 * @adapter: private structure
 290 *
 291 * Return 0 on success, negative on failure
 292 */
 293static int ena_setup_all_tx_resources(struct ena_adapter *adapter)
 294{
 295        int i, rc = 0;
 296
 297        for (i = 0; i < adapter->num_queues; i++) {
 298                rc = ena_setup_tx_resources(adapter, i);
 299                if (rc)
 300                        goto err_setup_tx;
 301        }
 302
 303        return 0;
 304
 305err_setup_tx:
 306
 307        netif_err(adapter, ifup, adapter->netdev,
 308                  "Tx queue %d: allocation failed\n", i);
 309
 310        /* rewind the index freeing the rings as we go */
 311        while (i--)
 312                ena_free_tx_resources(adapter, i);
 313        return rc;
 314}
 315
 316/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
 317 * @adapter: board private structure
 318 *
 319 * Free all transmit software resources
 320 */
 321static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
 322{
 323        int i;
 324
 325        for (i = 0; i < adapter->num_queues; i++)
 326                ena_free_tx_resources(adapter, i);
 327}
 328
 329static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
 330{
 331        if (likely(req_id < rx_ring->ring_size))
 332                return 0;
 333
 334        netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
 335                  "Invalid rx req_id: %hu\n", req_id);
 336
 337        u64_stats_update_begin(&rx_ring->syncp);
 338        rx_ring->rx_stats.bad_req_id++;
 339        u64_stats_update_end(&rx_ring->syncp);
 340
 341        /* Trigger device reset */
 342        rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
 343        set_bit(ENA_FLAG_TRIGGER_RESET, &rx_ring->adapter->flags);
 344        return -EFAULT;
 345}
 346
 347/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
 348 * @adapter: network interface device structure
 349 * @qid: queue index
 350 *
 351 * Returns 0 on success, negative on failure
 352 */
 353static int ena_setup_rx_resources(struct ena_adapter *adapter,
 354                                  u32 qid)
 355{
 356        struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 357        struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
 358        int size, node, i;
 359
 360        if (rx_ring->rx_buffer_info) {
 361                netif_err(adapter, ifup, adapter->netdev,
 362                          "rx_buffer_info is not NULL");
 363                return -EEXIST;
 364        }
 365
 366        /* alloc extra element so in rx path
 367         * we can always prefetch rx_info + 1
 368         */
 369        size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
 370        node = cpu_to_node(ena_irq->cpu);
 371
 372        rx_ring->rx_buffer_info = vzalloc_node(size, node);
 373        if (!rx_ring->rx_buffer_info) {
 374                rx_ring->rx_buffer_info = vzalloc(size);
 375                if (!rx_ring->rx_buffer_info)
 376                        return -ENOMEM;
 377        }
 378
 379        size = sizeof(u16) * rx_ring->ring_size;
 380        rx_ring->free_ids = vzalloc_node(size, node);
 381        if (!rx_ring->free_ids) {
 382                rx_ring->free_ids = vzalloc(size);
 383                if (!rx_ring->free_ids) {
 384                        vfree(rx_ring->rx_buffer_info);
 385                        rx_ring->rx_buffer_info = NULL;
 386                        return -ENOMEM;
 387                }
 388        }
 389
 390        /* Req id ring for receiving RX pkts out of order */
 391        for (i = 0; i < rx_ring->ring_size; i++)
 392                rx_ring->free_ids[i] = i;
 393
 394        /* Reset rx statistics */
 395        memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
 396
 397        rx_ring->next_to_clean = 0;
 398        rx_ring->next_to_use = 0;
 399        rx_ring->cpu = ena_irq->cpu;
 400
 401        return 0;
 402}
 403
 404/* ena_free_rx_resources - Free I/O Rx Resources
 405 * @adapter: network interface device structure
 406 * @qid: queue index
 407 *
 408 * Free all receive software resources
 409 */
 410static void ena_free_rx_resources(struct ena_adapter *adapter,
 411                                  u32 qid)
 412{
 413        struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 414
 415        vfree(rx_ring->rx_buffer_info);
 416        rx_ring->rx_buffer_info = NULL;
 417
 418        vfree(rx_ring->free_ids);
 419        rx_ring->free_ids = NULL;
 420}
 421
 422/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
 423 * @adapter: board private structure
 424 *
 425 * Return 0 on success, negative on failure
 426 */
 427static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
 428{
 429        int i, rc = 0;
 430
 431        for (i = 0; i < adapter->num_queues; i++) {
 432                rc = ena_setup_rx_resources(adapter, i);
 433                if (rc)
 434                        goto err_setup_rx;
 435        }
 436
 437        return 0;
 438
 439err_setup_rx:
 440
 441        netif_err(adapter, ifup, adapter->netdev,
 442                  "Rx queue %d: allocation failed\n", i);
 443
 444        /* rewind the index freeing the rings as we go */
 445        while (i--)
 446                ena_free_rx_resources(adapter, i);
 447        return rc;
 448}
 449
 450/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
 451 * @adapter: board private structure
 452 *
 453 * Free all receive software resources
 454 */
 455static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 456{
 457        int i;
 458
 459        for (i = 0; i < adapter->num_queues; i++)
 460                ena_free_rx_resources(adapter, i);
 461}
 462
 463static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 464                                    struct ena_rx_buffer *rx_info, gfp_t gfp)
 465{
 466        struct ena_com_buf *ena_buf;
 467        struct page *page;
 468        dma_addr_t dma;
 469
 470        /* if previous allocated page is not used */
 471        if (unlikely(rx_info->page))
 472                return 0;
 473
 474        page = alloc_page(gfp);
 475        if (unlikely(!page)) {
 476                u64_stats_update_begin(&rx_ring->syncp);
 477                rx_ring->rx_stats.page_alloc_fail++;
 478                u64_stats_update_end(&rx_ring->syncp);
 479                return -ENOMEM;
 480        }
 481
 482        dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
 483                           DMA_FROM_DEVICE);
 484        if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
 485                u64_stats_update_begin(&rx_ring->syncp);
 486                rx_ring->rx_stats.dma_mapping_err++;
 487                u64_stats_update_end(&rx_ring->syncp);
 488
 489                __free_page(page);
 490                return -EIO;
 491        }
 492        netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 493                  "alloc page %p, rx_info %p\n", page, rx_info);
 494
 495        rx_info->page = page;
 496        rx_info->page_offset = 0;
 497        ena_buf = &rx_info->ena_buf;
 498        ena_buf->paddr = dma;
 499        ena_buf->len = ENA_PAGE_SIZE;
 500
 501        return 0;
 502}
 503
 504static void ena_free_rx_page(struct ena_ring *rx_ring,
 505                             struct ena_rx_buffer *rx_info)
 506{
 507        struct page *page = rx_info->page;
 508        struct ena_com_buf *ena_buf = &rx_info->ena_buf;
 509
 510        if (unlikely(!page)) {
 511                netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
 512                           "Trying to free unallocated buffer\n");
 513                return;
 514        }
 515
 516        dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE,
 517                       DMA_FROM_DEVICE);
 518
 519        __free_page(page);
 520        rx_info->page = NULL;
 521}
 522
 523static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 524{
 525        u16 next_to_use, req_id;
 526        u32 i;
 527        int rc;
 528
 529        next_to_use = rx_ring->next_to_use;
 530
 531        for (i = 0; i < num; i++) {
 532                struct ena_rx_buffer *rx_info;
 533
 534                req_id = rx_ring->free_ids[next_to_use];
 535                rc = validate_rx_req_id(rx_ring, req_id);
 536                if (unlikely(rc < 0))
 537                        break;
 538
 539                rx_info = &rx_ring->rx_buffer_info[req_id];
 540
 541
 542                rc = ena_alloc_rx_page(rx_ring, rx_info,
 543                                       GFP_ATOMIC | __GFP_COMP);
 544                if (unlikely(rc < 0)) {
 545                        netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
 546                                   "failed to alloc buffer for rx queue %d\n",
 547                                   rx_ring->qid);
 548                        break;
 549                }
 550                rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
 551                                                &rx_info->ena_buf,
 552                                                req_id);
 553                if (unlikely(rc)) {
 554                        netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
 555                                   "failed to add buffer for rx queue %d\n",
 556                                   rx_ring->qid);
 557                        break;
 558                }
 559                next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
 560                                                   rx_ring->ring_size);
 561        }
 562
 563        if (unlikely(i < num)) {
 564                u64_stats_update_begin(&rx_ring->syncp);
 565                rx_ring->rx_stats.refil_partial++;
 566                u64_stats_update_end(&rx_ring->syncp);
 567                netdev_warn(rx_ring->netdev,
 568                            "refilled rx qid %d with only %d buffers (from %d)\n",
 569                            rx_ring->qid, i, num);
 570        }
 571
 572        /* ena_com_write_sq_doorbell issues a wmb() */
 573        if (likely(i))
 574                ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
 575
 576        rx_ring->next_to_use = next_to_use;
 577
 578        return i;
 579}
 580
 581static void ena_free_rx_bufs(struct ena_adapter *adapter,
 582                             u32 qid)
 583{
 584        struct ena_ring *rx_ring = &adapter->rx_ring[qid];
 585        u32 i;
 586
 587        for (i = 0; i < rx_ring->ring_size; i++) {
 588                struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
 589
 590                if (rx_info->page)
 591                        ena_free_rx_page(rx_ring, rx_info);
 592        }
 593}
 594
 595/* ena_refill_all_rx_bufs - allocate all queues Rx buffers
 596 * @adapter: board private structure
 597 */
 598static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
 599{
 600        struct ena_ring *rx_ring;
 601        int i, rc, bufs_num;
 602
 603        for (i = 0; i < adapter->num_queues; i++) {
 604                rx_ring = &adapter->rx_ring[i];
 605                bufs_num = rx_ring->ring_size - 1;
 606                rc = ena_refill_rx_bufs(rx_ring, bufs_num);
 607
 608                if (unlikely(rc != bufs_num))
 609                        netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
 610                                   "refilling Queue %d failed. allocated %d buffers from: %d\n",
 611                                   i, rc, bufs_num);
 612        }
 613}
 614
 615static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
 616{
 617        int i;
 618
 619        for (i = 0; i < adapter->num_queues; i++)
 620                ena_free_rx_bufs(adapter, i);
 621}
 622
 623static void ena_unmap_tx_skb(struct ena_ring *tx_ring,
 624                                    struct ena_tx_buffer *tx_info)
 625{
 626        struct ena_com_buf *ena_buf;
 627        u32 cnt;
 628        int i;
 629
 630        ena_buf = tx_info->bufs;
 631        cnt = tx_info->num_of_bufs;
 632
 633        if (unlikely(!cnt))
 634                return;
 635
 636        if (tx_info->map_linear_data) {
 637                dma_unmap_single(tx_ring->dev,
 638                                 dma_unmap_addr(ena_buf, paddr),
 639                                 dma_unmap_len(ena_buf, len),
 640                                 DMA_TO_DEVICE);
 641                ena_buf++;
 642                cnt--;
 643        }
 644
 645        /* unmap remaining mapped pages */
 646        for (i = 0; i < cnt; i++) {
 647                dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
 648                               dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
 649                ena_buf++;
 650        }
 651}
 652
 653/* ena_free_tx_bufs - Free Tx Buffers per Queue
 654 * @tx_ring: TX ring for which buffers be freed
 655 */
 656static void ena_free_tx_bufs(struct ena_ring *tx_ring)
 657{
 658        bool print_once = true;
 659        u32 i;
 660
 661        for (i = 0; i < tx_ring->ring_size; i++) {
 662                struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
 663
 664                if (!tx_info->skb)
 665                        continue;
 666
 667                if (print_once) {
 668                        netdev_notice(tx_ring->netdev,
 669                                      "free uncompleted tx skb qid %d idx 0x%x\n",
 670                                      tx_ring->qid, i);
 671                        print_once = false;
 672                } else {
 673                        netdev_dbg(tx_ring->netdev,
 674                                   "free uncompleted tx skb qid %d idx 0x%x\n",
 675                                   tx_ring->qid, i);
 676                }
 677
 678                ena_unmap_tx_skb(tx_ring, tx_info);
 679
 680                dev_kfree_skb_any(tx_info->skb);
 681        }
 682        netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
 683                                                  tx_ring->qid));
 684}
 685
 686static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
 687{
 688        struct ena_ring *tx_ring;
 689        int i;
 690
 691        for (i = 0; i < adapter->num_queues; i++) {
 692                tx_ring = &adapter->tx_ring[i];
 693                ena_free_tx_bufs(tx_ring);
 694        }
 695}
 696
 697static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
 698{
 699        u16 ena_qid;
 700        int i;
 701
 702        for (i = 0; i < adapter->num_queues; i++) {
 703                ena_qid = ENA_IO_TXQ_IDX(i);
 704                ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
 705        }
 706}
 707
 708static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
 709{
 710        u16 ena_qid;
 711        int i;
 712
 713        for (i = 0; i < adapter->num_queues; i++) {
 714                ena_qid = ENA_IO_RXQ_IDX(i);
 715                ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
 716        }
 717}
 718
 719static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
 720{
 721        ena_destroy_all_tx_queues(adapter);
 722        ena_destroy_all_rx_queues(adapter);
 723}
 724
 725static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
 726{
 727        struct ena_tx_buffer *tx_info = NULL;
 728
 729        if (likely(req_id < tx_ring->ring_size)) {
 730                tx_info = &tx_ring->tx_buffer_info[req_id];
 731                if (likely(tx_info->skb))
 732                        return 0;
 733        }
 734
 735        if (tx_info)
 736                netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
 737                          "tx_info doesn't have valid skb\n");
 738        else
 739                netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
 740                          "Invalid req_id: %hu\n", req_id);
 741
 742        u64_stats_update_begin(&tx_ring->syncp);
 743        tx_ring->tx_stats.bad_req_id++;
 744        u64_stats_update_end(&tx_ring->syncp);
 745
 746        /* Trigger device reset */
 747        tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
 748        set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags);
 749        return -EFAULT;
 750}
 751
 752static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
 753{
 754        struct netdev_queue *txq;
 755        bool above_thresh;
 756        u32 tx_bytes = 0;
 757        u32 total_done = 0;
 758        u16 next_to_clean;
 759        u16 req_id;
 760        int tx_pkts = 0;
 761        int rc;
 762
 763        next_to_clean = tx_ring->next_to_clean;
 764        txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
 765
 766        while (tx_pkts < budget) {
 767                struct ena_tx_buffer *tx_info;
 768                struct sk_buff *skb;
 769
 770                rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
 771                                                &req_id);
 772                if (rc)
 773                        break;
 774
 775                rc = validate_tx_req_id(tx_ring, req_id);
 776                if (rc)
 777                        break;
 778
 779                tx_info = &tx_ring->tx_buffer_info[req_id];
 780                skb = tx_info->skb;
 781
 782                /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
 783                prefetch(&skb->end);
 784
 785                tx_info->skb = NULL;
 786                tx_info->last_jiffies = 0;
 787
 788                ena_unmap_tx_skb(tx_ring, tx_info);
 789
 790                netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
 791                          "tx_poll: q %d skb %p completed\n", tx_ring->qid,
 792                          skb);
 793
 794                tx_bytes += skb->len;
 795                dev_kfree_skb(skb);
 796                tx_pkts++;
 797                total_done += tx_info->tx_descs;
 798
 799                tx_ring->free_ids[next_to_clean] = req_id;
 800                next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
 801                                                     tx_ring->ring_size);
 802        }
 803
 804        tx_ring->next_to_clean = next_to_clean;
 805        ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
 806        ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
 807
 808        netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
 809
 810        netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
 811                  "tx_poll: q %d done. total pkts: %d\n",
 812                  tx_ring->qid, tx_pkts);
 813
 814        /* need to make the rings circular update visible to
 815         * ena_start_xmit() before checking for netif_queue_stopped().
 816         */
 817        smp_mb();
 818
 819        above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
 820                                                    ENA_TX_WAKEUP_THRESH);
 821        if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
 822                __netif_tx_lock(txq, smp_processor_id());
 823                above_thresh =
 824                        ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
 825                                                     ENA_TX_WAKEUP_THRESH);
 826                if (netif_tx_queue_stopped(txq) && above_thresh) {
 827                        netif_tx_wake_queue(txq);
 828                        u64_stats_update_begin(&tx_ring->syncp);
 829                        tx_ring->tx_stats.queue_wakeup++;
 830                        u64_stats_update_end(&tx_ring->syncp);
 831                }
 832                __netif_tx_unlock(txq);
 833        }
 834
 835        tx_ring->per_napi_bytes += tx_bytes;
 836        tx_ring->per_napi_packets += tx_pkts;
 837
 838        return tx_pkts;
 839}
 840
 841static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags)
 842{
 843        struct sk_buff *skb;
 844
 845        if (frags)
 846                skb = napi_get_frags(rx_ring->napi);
 847        else
 848                skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
 849                                                rx_ring->rx_copybreak);
 850
 851        if (unlikely(!skb)) {
 852                u64_stats_update_begin(&rx_ring->syncp);
 853                rx_ring->rx_stats.skb_alloc_fail++;
 854                u64_stats_update_end(&rx_ring->syncp);
 855                netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
 856                          "Failed to allocate skb. frags: %d\n", frags);
 857                return NULL;
 858        }
 859
 860        return skb;
 861}
 862
 863static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 864                                  struct ena_com_rx_buf_info *ena_bufs,
 865                                  u32 descs,
 866                                  u16 *next_to_clean)
 867{
 868        struct sk_buff *skb;
 869        struct ena_rx_buffer *rx_info;
 870        u16 len, req_id, buf = 0;
 871        void *va;
 872
 873        len = ena_bufs[buf].len;
 874        req_id = ena_bufs[buf].req_id;
 875        rx_info = &rx_ring->rx_buffer_info[req_id];
 876
 877        if (unlikely(!rx_info->page)) {
 878                netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
 879                          "Page is NULL\n");
 880                return NULL;
 881        }
 882
 883        netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 884                  "rx_info %p page %p\n",
 885                  rx_info, rx_info->page);
 886
 887        /* save virt address of first buffer */
 888        va = page_address(rx_info->page) + rx_info->page_offset;
 889        prefetch(va + NET_IP_ALIGN);
 890
 891        if (len <= rx_ring->rx_copybreak) {
 892                skb = ena_alloc_skb(rx_ring, false);
 893                if (unlikely(!skb))
 894                        return NULL;
 895
 896                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 897                          "rx allocated small packet. len %d. data_len %d\n",
 898                          skb->len, skb->data_len);
 899
 900                /* sync this buffer for CPU use */
 901                dma_sync_single_for_cpu(rx_ring->dev,
 902                                        dma_unmap_addr(&rx_info->ena_buf, paddr),
 903                                        len,
 904                                        DMA_FROM_DEVICE);
 905                skb_copy_to_linear_data(skb, va, len);
 906                dma_sync_single_for_device(rx_ring->dev,
 907                                           dma_unmap_addr(&rx_info->ena_buf, paddr),
 908                                           len,
 909                                           DMA_FROM_DEVICE);
 910
 911                skb_put(skb, len);
 912                skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 913                rx_ring->free_ids[*next_to_clean] = req_id;
 914                *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
 915                                                     rx_ring->ring_size);
 916                return skb;
 917        }
 918
 919        skb = ena_alloc_skb(rx_ring, true);
 920        if (unlikely(!skb))
 921                return NULL;
 922
 923        do {
 924                dma_unmap_page(rx_ring->dev,
 925                               dma_unmap_addr(&rx_info->ena_buf, paddr),
 926                               ENA_PAGE_SIZE, DMA_FROM_DEVICE);
 927
 928                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
 929                                rx_info->page_offset, len, ENA_PAGE_SIZE);
 930
 931                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 932                          "rx skb updated. len %d. data_len %d\n",
 933                          skb->len, skb->data_len);
 934
 935                rx_info->page = NULL;
 936
 937                rx_ring->free_ids[*next_to_clean] = req_id;
 938                *next_to_clean =
 939                        ENA_RX_RING_IDX_NEXT(*next_to_clean,
 940                                             rx_ring->ring_size);
 941                if (likely(--descs == 0))
 942                        break;
 943
 944                buf++;
 945                len = ena_bufs[buf].len;
 946                req_id = ena_bufs[buf].req_id;
 947                rx_info = &rx_ring->rx_buffer_info[req_id];
 948        } while (1);
 949
 950        return skb;
 951}
 952
 953/* ena_rx_checksum - indicate in skb if hw indicated a good cksum
 954 * @adapter: structure containing adapter specific data
 955 * @ena_rx_ctx: received packet context/metadata
 956 * @skb: skb currently being received and modified
 957 */
 958static void ena_rx_checksum(struct ena_ring *rx_ring,
 959                                   struct ena_com_rx_ctx *ena_rx_ctx,
 960                                   struct sk_buff *skb)
 961{
 962        /* Rx csum disabled */
 963        if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
 964                skb->ip_summed = CHECKSUM_NONE;
 965                return;
 966        }
 967
 968        /* For fragmented packets the checksum isn't valid */
 969        if (ena_rx_ctx->frag) {
 970                skb->ip_summed = CHECKSUM_NONE;
 971                return;
 972        }
 973
 974        /* if IP and error */
 975        if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
 976                     (ena_rx_ctx->l3_csum_err))) {
 977                /* ipv4 checksum error */
 978                skb->ip_summed = CHECKSUM_NONE;
 979                u64_stats_update_begin(&rx_ring->syncp);
 980                rx_ring->rx_stats.bad_csum++;
 981                u64_stats_update_end(&rx_ring->syncp);
 982                netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
 983                          "RX IPv4 header checksum error\n");
 984                return;
 985        }
 986
 987        /* if TCP/UDP */
 988        if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
 989                   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
 990                if (unlikely(ena_rx_ctx->l4_csum_err)) {
 991                        /* TCP/UDP checksum error */
 992                        u64_stats_update_begin(&rx_ring->syncp);
 993                        rx_ring->rx_stats.bad_csum++;
 994                        u64_stats_update_end(&rx_ring->syncp);
 995                        netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
 996                                  "RX L4 checksum error\n");
 997                        skb->ip_summed = CHECKSUM_NONE;
 998                        return;
 999                }
1000
1001                if (likely(ena_rx_ctx->l4_csum_checked)) {
1002                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1003                        u64_stats_update_begin(&rx_ring->syncp);
1004                        rx_ring->rx_stats.csum_good++;
1005                        u64_stats_update_end(&rx_ring->syncp);
1006                } else {
1007                        u64_stats_update_begin(&rx_ring->syncp);
1008                        rx_ring->rx_stats.csum_unchecked++;
1009                        u64_stats_update_end(&rx_ring->syncp);
1010                        skb->ip_summed = CHECKSUM_NONE;
1011                }
1012        } else {
1013                skb->ip_summed = CHECKSUM_NONE;
1014                return;
1015        }
1016
1017}
1018
1019static void ena_set_rx_hash(struct ena_ring *rx_ring,
1020                            struct ena_com_rx_ctx *ena_rx_ctx,
1021                            struct sk_buff *skb)
1022{
1023        enum pkt_hash_types hash_type;
1024
1025        if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
1026                if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1027                           (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
1028
1029                        hash_type = PKT_HASH_TYPE_L4;
1030                else
1031                        hash_type = PKT_HASH_TYPE_NONE;
1032
1033                /* Override hash type if the packet is fragmented */
1034                if (ena_rx_ctx->frag)
1035                        hash_type = PKT_HASH_TYPE_NONE;
1036
1037                skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
1038        }
1039}
1040
1041/* ena_clean_rx_irq - Cleanup RX irq
1042 * @rx_ring: RX ring to clean
1043 * @napi: napi handler
1044 * @budget: how many packets driver is allowed to clean
1045 *
1046 * Returns the number of cleaned buffers.
1047 */
1048static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
1049                            u32 budget)
1050{
1051        u16 next_to_clean = rx_ring->next_to_clean;
1052        u32 res_budget, work_done;
1053
1054        struct ena_com_rx_ctx ena_rx_ctx;
1055        struct ena_adapter *adapter;
1056        struct sk_buff *skb;
1057        int refill_required;
1058        int refill_threshold;
1059        int rc = 0;
1060        int total_len = 0;
1061        int rx_copybreak_pkt = 0;
1062        int i;
1063
1064        netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1065                  "%s qid %d\n", __func__, rx_ring->qid);
1066        res_budget = budget;
1067
1068        do {
1069                ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1070                ena_rx_ctx.max_bufs = rx_ring->sgl_size;
1071                ena_rx_ctx.descs = 0;
1072                rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
1073                                    rx_ring->ena_com_io_sq,
1074                                    &ena_rx_ctx);
1075                if (unlikely(rc))
1076                        goto error;
1077
1078                if (unlikely(ena_rx_ctx.descs == 0))
1079                        break;
1080
1081                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1082                          "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1083                          rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1084                          ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1085
1086                /* allocate skb and fill it */
1087                skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs,
1088                                 &next_to_clean);
1089
1090                /* exit if we failed to retrieve a buffer */
1091                if (unlikely(!skb)) {
1092                        for (i = 0; i < ena_rx_ctx.descs; i++) {
1093                                rx_ring->free_ids[next_to_clean] =
1094                                        rx_ring->ena_bufs[i].req_id;
1095                                next_to_clean =
1096                                        ENA_RX_RING_IDX_NEXT(next_to_clean,
1097                                                             rx_ring->ring_size);
1098                        }
1099                        break;
1100                }
1101
1102                ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
1103
1104                ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
1105
1106                skb_record_rx_queue(skb, rx_ring->qid);
1107
1108                if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) {
1109                        total_len += rx_ring->ena_bufs[0].len;
1110                        rx_copybreak_pkt++;
1111                        napi_gro_receive(napi, skb);
1112                } else {
1113                        total_len += skb->len;
1114                        napi_gro_frags(napi);
1115                }
1116
1117                res_budget--;
1118        } while (likely(res_budget));
1119
1120        work_done = budget - res_budget;
1121        rx_ring->per_napi_bytes += total_len;
1122        rx_ring->per_napi_packets += work_done;
1123        u64_stats_update_begin(&rx_ring->syncp);
1124        rx_ring->rx_stats.bytes += total_len;
1125        rx_ring->rx_stats.cnt += work_done;
1126        rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
1127        u64_stats_update_end(&rx_ring->syncp);
1128
1129        rx_ring->next_to_clean = next_to_clean;
1130
1131        refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq);
1132        refill_threshold =
1133                min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
1134                      ENA_RX_REFILL_THRESH_PACKET);
1135
1136        /* Optimization, try to batch new rx buffers */
1137        if (refill_required > refill_threshold) {
1138                ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1139                ena_refill_rx_bufs(rx_ring, refill_required);
1140        }
1141
1142        return work_done;
1143
1144error:
1145        adapter = netdev_priv(rx_ring->netdev);
1146
1147        u64_stats_update_begin(&rx_ring->syncp);
1148        rx_ring->rx_stats.bad_desc_num++;
1149        u64_stats_update_end(&rx_ring->syncp);
1150
1151        /* Too many desc from the device. Trigger reset */
1152        adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
1153        set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
1154
1155        return 0;
1156}
1157
1158void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
1159                                       struct ena_ring *tx_ring)
1160{
1161        /* We apply adaptive moderation on Rx path only.
1162         * Tx uses static interrupt moderation.
1163         */
1164        ena_com_calculate_interrupt_delay(rx_ring->ena_dev,
1165                                          rx_ring->per_napi_packets,
1166                                          rx_ring->per_napi_bytes,
1167                                          &rx_ring->smoothed_interval,
1168                                          &rx_ring->moder_tbl_idx);
1169
1170        /* Reset per napi packets/bytes */
1171        tx_ring->per_napi_packets = 0;
1172        tx_ring->per_napi_bytes = 0;
1173        rx_ring->per_napi_packets = 0;
1174        rx_ring->per_napi_bytes = 0;
1175}
1176
1177static void ena_unmask_interrupt(struct ena_ring *tx_ring,
1178                                        struct ena_ring *rx_ring)
1179{
1180        struct ena_eth_io_intr_reg intr_reg;
1181
1182        /* Update intr register: rx intr delay,
1183         * tx intr delay and interrupt unmask
1184         */
1185        ena_com_update_intr_reg(&intr_reg,
1186                                rx_ring->smoothed_interval,
1187                                tx_ring->smoothed_interval,
1188                                true);
1189
1190        /* It is a shared MSI-X.
1191         * Tx and Rx CQ have pointer to it.
1192         * So we use one of them to reach the intr reg
1193         */
1194        ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
1195}
1196
1197static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
1198                                             struct ena_ring *rx_ring)
1199{
1200        int cpu = get_cpu();
1201        int numa_node;
1202
1203        /* Check only one ring since the 2 rings are running on the same cpu */
1204        if (likely(tx_ring->cpu == cpu))
1205                goto out;
1206
1207        numa_node = cpu_to_node(cpu);
1208        put_cpu();
1209
1210        if (numa_node != NUMA_NO_NODE) {
1211                ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
1212                ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node);
1213        }
1214
1215        tx_ring->cpu = cpu;
1216        rx_ring->cpu = cpu;
1217
1218        return;
1219out:
1220        put_cpu();
1221}
1222
1223static int ena_io_poll(struct napi_struct *napi, int budget)
1224{
1225        struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
1226        struct ena_ring *tx_ring, *rx_ring;
1227
1228        u32 tx_work_done;
1229        u32 rx_work_done;
1230        int tx_budget;
1231        int napi_comp_call = 0;
1232        int ret;
1233
1234        tx_ring = ena_napi->tx_ring;
1235        rx_ring = ena_napi->rx_ring;
1236
1237        tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
1238
1239        if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1240            test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
1241                napi_complete_done(napi, 0);
1242                return 0;
1243        }
1244
1245        tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
1246        rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
1247
1248        /* If the device is about to reset or down, avoid unmask
1249         * the interrupt and return 0 so NAPI won't reschedule
1250         */
1251        if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1252                     test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
1253                napi_complete_done(napi, 0);
1254                ret = 0;
1255
1256        } else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
1257                napi_comp_call = 1;
1258
1259                /* Update numa and unmask the interrupt only when schedule
1260                 * from the interrupt context (vs from sk_busy_loop)
1261                 */
1262                if (napi_complete_done(napi, rx_work_done)) {
1263                        /* Tx and Rx share the same interrupt vector */
1264                        if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
1265                                ena_adjust_intr_moderation(rx_ring, tx_ring);
1266
1267                        ena_unmask_interrupt(tx_ring, rx_ring);
1268                }
1269
1270                ena_update_ring_numa_node(tx_ring, rx_ring);
1271
1272                ret = rx_work_done;
1273        } else {
1274                ret = budget;
1275        }
1276
1277        u64_stats_update_begin(&tx_ring->syncp);
1278        tx_ring->tx_stats.napi_comp += napi_comp_call;
1279        tx_ring->tx_stats.tx_poll++;
1280        u64_stats_update_end(&tx_ring->syncp);
1281
1282        return ret;
1283}
1284
1285static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
1286{
1287        struct ena_adapter *adapter = (struct ena_adapter *)data;
1288
1289        ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1290
1291        /* Don't call the aenq handler before probe is done */
1292        if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
1293                ena_com_aenq_intr_handler(adapter->ena_dev, data);
1294
1295        return IRQ_HANDLED;
1296}
1297
1298/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
1299 * @irq: interrupt number
1300 * @data: pointer to a network interface private napi device structure
1301 */
1302static irqreturn_t ena_intr_msix_io(int irq, void *data)
1303{
1304        struct ena_napi *ena_napi = data;
1305
1306        ena_napi->tx_ring->first_interrupt = true;
1307        ena_napi->rx_ring->first_interrupt = true;
1308
1309        napi_schedule_irqoff(&ena_napi->napi);
1310
1311        return IRQ_HANDLED;
1312}
1313
1314/* Reserve a single MSI-X vector for management (admin + aenq).
1315 * plus reserve one vector for each potential io queue.
1316 * the number of potential io queues is the minimum of what the device
1317 * supports and the number of vCPUs.
1318 */
1319static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
1320{
1321        int msix_vecs, irq_cnt;
1322
1323        if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1324                netif_err(adapter, probe, adapter->netdev,
1325                          "Error, MSI-X is already enabled\n");
1326                return -EPERM;
1327        }
1328
1329        /* Reserved the max msix vectors we might need */
1330        msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
1331        netif_dbg(adapter, probe, adapter->netdev,
1332                  "trying to enable MSI-X, vectors %d\n", msix_vecs);
1333
1334        irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
1335                                        msix_vecs, PCI_IRQ_MSIX);
1336
1337        if (irq_cnt < 0) {
1338                netif_err(adapter, probe, adapter->netdev,
1339                          "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
1340                return -ENOSPC;
1341        }
1342
1343        if (irq_cnt != msix_vecs) {
1344                netif_notice(adapter, probe, adapter->netdev,
1345                             "enable only %d MSI-X (out of %d), reduce the number of queues\n",
1346                             irq_cnt, msix_vecs);
1347                adapter->num_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
1348        }
1349
1350        if (ena_init_rx_cpu_rmap(adapter))
1351                netif_warn(adapter, probe, adapter->netdev,
1352                           "Failed to map IRQs to CPUs\n");
1353
1354        adapter->msix_vecs = irq_cnt;
1355        set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
1356
1357        return 0;
1358}
1359
1360static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1361{
1362        u32 cpu;
1363
1364        snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1365                 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1366                 pci_name(adapter->pdev));
1367        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
1368                ena_intr_msix_mgmnt;
1369        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1370        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1371                pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
1372        cpu = cpumask_first(cpu_online_mask);
1373        adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
1374        cpumask_set_cpu(cpu,
1375                        &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
1376}
1377
1378static void ena_setup_io_intr(struct ena_adapter *adapter)
1379{
1380        struct net_device *netdev;
1381        int irq_idx, i, cpu;
1382
1383        netdev = adapter->netdev;
1384
1385        for (i = 0; i < adapter->num_queues; i++) {
1386                irq_idx = ENA_IO_IRQ_IDX(i);
1387                cpu = i % num_online_cpus();
1388
1389                snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1390                         "%s-Tx-Rx-%d", netdev->name, i);
1391                adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
1392                adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
1393                adapter->irq_tbl[irq_idx].vector =
1394                        pci_irq_vector(adapter->pdev, irq_idx);
1395                adapter->irq_tbl[irq_idx].cpu = cpu;
1396
1397                cpumask_set_cpu(cpu,
1398                                &adapter->irq_tbl[irq_idx].affinity_hint_mask);
1399        }
1400}
1401
1402static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
1403{
1404        unsigned long flags = 0;
1405        struct ena_irq *irq;
1406        int rc;
1407
1408        irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1409        rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1410                         irq->data);
1411        if (rc) {
1412                netif_err(adapter, probe, adapter->netdev,
1413                          "failed to request admin irq\n");
1414                return rc;
1415        }
1416
1417        netif_dbg(adapter, probe, adapter->netdev,
1418                  "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
1419                  irq->affinity_hint_mask.bits[0], irq->vector);
1420
1421        irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1422
1423        return rc;
1424}
1425
1426static int ena_request_io_irq(struct ena_adapter *adapter)
1427{
1428        unsigned long flags = 0;
1429        struct ena_irq *irq;
1430        int rc = 0, i, k;
1431
1432        if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1433                netif_err(adapter, ifup, adapter->netdev,
1434                          "Failed to request I/O IRQ: MSI-X is not enabled\n");
1435                return -EINVAL;
1436        }
1437
1438        for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1439                irq = &adapter->irq_tbl[i];
1440                rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1441                                 irq->data);
1442                if (rc) {
1443                        netif_err(adapter, ifup, adapter->netdev,
1444                                  "Failed to request I/O IRQ. index %d rc %d\n",
1445                                   i, rc);
1446                        goto err;
1447                }
1448
1449                netif_dbg(adapter, ifup, adapter->netdev,
1450                          "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
1451                          i, irq->affinity_hint_mask.bits[0], irq->vector);
1452
1453                irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1454        }
1455
1456        return rc;
1457
1458err:
1459        for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
1460                irq = &adapter->irq_tbl[k];
1461                free_irq(irq->vector, irq->data);
1462        }
1463
1464        return rc;
1465}
1466
1467static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
1468{
1469        struct ena_irq *irq;
1470
1471        irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1472        synchronize_irq(irq->vector);
1473        irq_set_affinity_hint(irq->vector, NULL);
1474        free_irq(irq->vector, irq->data);
1475}
1476
1477static void ena_free_io_irq(struct ena_adapter *adapter)
1478{
1479        struct ena_irq *irq;
1480        int i;
1481
1482#ifdef CONFIG_RFS_ACCEL
1483        if (adapter->msix_vecs >= 1) {
1484                free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
1485                adapter->netdev->rx_cpu_rmap = NULL;
1486        }
1487#endif /* CONFIG_RFS_ACCEL */
1488
1489        for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1490                irq = &adapter->irq_tbl[i];
1491                irq_set_affinity_hint(irq->vector, NULL);
1492                free_irq(irq->vector, irq->data);
1493        }
1494}
1495
1496static void ena_disable_msix(struct ena_adapter *adapter)
1497{
1498        if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
1499                pci_free_irq_vectors(adapter->pdev);
1500}
1501
1502static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
1503{
1504        int i;
1505
1506        if (!netif_running(adapter->netdev))
1507                return;
1508
1509        for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++)
1510                synchronize_irq(adapter->irq_tbl[i].vector);
1511}
1512
1513static void ena_del_napi(struct ena_adapter *adapter)
1514{
1515        int i;
1516
1517        for (i = 0; i < adapter->num_queues; i++)
1518                netif_napi_del(&adapter->ena_napi[i].napi);
1519}
1520
1521static void ena_init_napi(struct ena_adapter *adapter)
1522{
1523        struct ena_napi *napi;
1524        int i;
1525
1526        for (i = 0; i < adapter->num_queues; i++) {
1527                napi = &adapter->ena_napi[i];
1528
1529                netif_napi_add(adapter->netdev,
1530                               &adapter->ena_napi[i].napi,
1531                               ena_io_poll,
1532                               ENA_NAPI_BUDGET);
1533                napi->rx_ring = &adapter->rx_ring[i];
1534                napi->tx_ring = &adapter->tx_ring[i];
1535                napi->qid = i;
1536        }
1537}
1538
1539static void ena_napi_disable_all(struct ena_adapter *adapter)
1540{
1541        int i;
1542
1543        for (i = 0; i < adapter->num_queues; i++)
1544                napi_disable(&adapter->ena_napi[i].napi);
1545}
1546
1547static void ena_napi_enable_all(struct ena_adapter *adapter)
1548{
1549        int i;
1550
1551        for (i = 0; i < adapter->num_queues; i++)
1552                napi_enable(&adapter->ena_napi[i].napi);
1553}
1554
1555static void ena_restore_ethtool_params(struct ena_adapter *adapter)
1556{
1557        adapter->tx_usecs = 0;
1558        adapter->rx_usecs = 0;
1559        adapter->tx_frames = 1;
1560        adapter->rx_frames = 1;
1561}
1562
1563/* Configure the Rx forwarding */
1564static int ena_rss_configure(struct ena_adapter *adapter)
1565{
1566        struct ena_com_dev *ena_dev = adapter->ena_dev;
1567        int rc;
1568
1569        /* In case the RSS table wasn't initialized by probe */
1570        if (!ena_dev->rss.tbl_log_size) {
1571                rc = ena_rss_init_default(adapter);
1572                if (rc && (rc != -EOPNOTSUPP)) {
1573                        netif_err(adapter, ifup, adapter->netdev,
1574                                  "Failed to init RSS rc: %d\n", rc);
1575                        return rc;
1576                }
1577        }
1578
1579        /* Set indirect table */
1580        rc = ena_com_indirect_table_set(ena_dev);
1581        if (unlikely(rc && rc != -EOPNOTSUPP))
1582                return rc;
1583
1584        /* Configure hash function (if supported) */
1585        rc = ena_com_set_hash_function(ena_dev);
1586        if (unlikely(rc && (rc != -EOPNOTSUPP)))
1587                return rc;
1588
1589        /* Configure hash inputs (if supported) */
1590        rc = ena_com_set_hash_ctrl(ena_dev);
1591        if (unlikely(rc && (rc != -EOPNOTSUPP)))
1592                return rc;
1593
1594        return 0;
1595}
1596
1597static int ena_up_complete(struct ena_adapter *adapter)
1598{
1599        int rc;
1600
1601        rc = ena_rss_configure(adapter);
1602        if (rc)
1603                return rc;
1604
1605        ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
1606
1607        ena_refill_all_rx_bufs(adapter);
1608
1609        /* enable transmits */
1610        netif_tx_start_all_queues(adapter->netdev);
1611
1612        ena_restore_ethtool_params(adapter);
1613
1614        ena_napi_enable_all(adapter);
1615
1616        return 0;
1617}
1618
1619static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
1620{
1621        struct ena_com_create_io_ctx ctx;
1622        struct ena_com_dev *ena_dev;
1623        struct ena_ring *tx_ring;
1624        u32 msix_vector;
1625        u16 ena_qid;
1626        int rc;
1627
1628        ena_dev = adapter->ena_dev;
1629
1630        tx_ring = &adapter->tx_ring[qid];
1631        msix_vector = ENA_IO_IRQ_IDX(qid);
1632        ena_qid = ENA_IO_TXQ_IDX(qid);
1633
1634        memset(&ctx, 0x0, sizeof(ctx));
1635
1636        ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1637        ctx.qid = ena_qid;
1638        ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1639        ctx.msix_vector = msix_vector;
1640        ctx.queue_size = tx_ring->ring_size;
1641        ctx.numa_node = cpu_to_node(tx_ring->cpu);
1642
1643        rc = ena_com_create_io_queue(ena_dev, &ctx);
1644        if (rc) {
1645                netif_err(adapter, ifup, adapter->netdev,
1646                          "Failed to create I/O TX queue num %d rc: %d\n",
1647                          qid, rc);
1648                return rc;
1649        }
1650
1651        rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1652                                     &tx_ring->ena_com_io_sq,
1653                                     &tx_ring->ena_com_io_cq);
1654        if (rc) {
1655                netif_err(adapter, ifup, adapter->netdev,
1656                          "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
1657                          qid, rc);
1658                ena_com_destroy_io_queue(ena_dev, ena_qid);
1659                return rc;
1660        }
1661
1662        ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
1663        return rc;
1664}
1665
1666static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
1667{
1668        struct ena_com_dev *ena_dev = adapter->ena_dev;
1669        int rc, i;
1670
1671        for (i = 0; i < adapter->num_queues; i++) {
1672                rc = ena_create_io_tx_queue(adapter, i);
1673                if (rc)
1674                        goto create_err;
1675        }
1676
1677        return 0;
1678
1679create_err:
1680        while (i--)
1681                ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1682
1683        return rc;
1684}
1685
1686static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
1687{
1688        struct ena_com_dev *ena_dev;
1689        struct ena_com_create_io_ctx ctx;
1690        struct ena_ring *rx_ring;
1691        u32 msix_vector;
1692        u16 ena_qid;
1693        int rc;
1694
1695        ena_dev = adapter->ena_dev;
1696
1697        rx_ring = &adapter->rx_ring[qid];
1698        msix_vector = ENA_IO_IRQ_IDX(qid);
1699        ena_qid = ENA_IO_RXQ_IDX(qid);
1700
1701        memset(&ctx, 0x0, sizeof(ctx));
1702
1703        ctx.qid = ena_qid;
1704        ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1705        ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1706        ctx.msix_vector = msix_vector;
1707        ctx.queue_size = rx_ring->ring_size;
1708        ctx.numa_node = cpu_to_node(rx_ring->cpu);
1709
1710        rc = ena_com_create_io_queue(ena_dev, &ctx);
1711        if (rc) {
1712                netif_err(adapter, ifup, adapter->netdev,
1713                          "Failed to create I/O RX queue num %d rc: %d\n",
1714                          qid, rc);
1715                return rc;
1716        }
1717
1718        rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1719                                     &rx_ring->ena_com_io_sq,
1720                                     &rx_ring->ena_com_io_cq);
1721        if (rc) {
1722                netif_err(adapter, ifup, adapter->netdev,
1723                          "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
1724                          qid, rc);
1725                ena_com_destroy_io_queue(ena_dev, ena_qid);
1726                return rc;
1727        }
1728
1729        ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
1730
1731        return rc;
1732}
1733
1734static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
1735{
1736        struct ena_com_dev *ena_dev = adapter->ena_dev;
1737        int rc, i;
1738
1739        for (i = 0; i < adapter->num_queues; i++) {
1740                rc = ena_create_io_rx_queue(adapter, i);
1741                if (rc)
1742                        goto create_err;
1743        }
1744
1745        return 0;
1746
1747create_err:
1748        while (i--)
1749                ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1750
1751        return rc;
1752}
1753
1754static void set_io_rings_size(struct ena_adapter *adapter,
1755                                     int new_tx_size, int new_rx_size)
1756{
1757        int i;
1758
1759        for (i = 0; i < adapter->num_queues; i++) {
1760                adapter->tx_ring[i].ring_size = new_tx_size;
1761                adapter->rx_ring[i].ring_size = new_rx_size;
1762        }
1763}
1764
1765/* This function allows queue allocation to backoff when the system is
1766 * low on memory. If there is not enough memory to allocate io queues
1767 * the driver will try to allocate smaller queues.
1768 *
1769 * The backoff algorithm is as follows:
1770 *  1. Try to allocate TX and RX and if successful.
1771 *  1.1. return success
1772 *
1773 *  2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
1774 *
1775 *  3. If TX or RX is smaller than 256
1776 *  3.1. return failure.
1777 *  4. else
1778 *  4.1. go back to 1.
1779 */
1780static int create_queues_with_size_backoff(struct ena_adapter *adapter)
1781{
1782        int rc, cur_rx_ring_size, cur_tx_ring_size;
1783        int new_rx_ring_size, new_tx_ring_size;
1784
1785        /* current queue sizes might be set to smaller than the requested
1786         * ones due to past queue allocation failures.
1787         */
1788        set_io_rings_size(adapter, adapter->requested_tx_ring_size,
1789                          adapter->requested_rx_ring_size);
1790
1791        while (1) {
1792                rc = ena_setup_all_tx_resources(adapter);
1793                if (rc)
1794                        goto err_setup_tx;
1795
1796                rc = ena_create_all_io_tx_queues(adapter);
1797                if (rc)
1798                        goto err_create_tx_queues;
1799
1800                rc = ena_setup_all_rx_resources(adapter);
1801                if (rc)
1802                        goto err_setup_rx;
1803
1804                rc = ena_create_all_io_rx_queues(adapter);
1805                if (rc)
1806                        goto err_create_rx_queues;
1807
1808                return 0;
1809
1810err_create_rx_queues:
1811                ena_free_all_io_rx_resources(adapter);
1812err_setup_rx:
1813                ena_destroy_all_tx_queues(adapter);
1814err_create_tx_queues:
1815                ena_free_all_io_tx_resources(adapter);
1816err_setup_tx:
1817                if (rc != -ENOMEM) {
1818                        netif_err(adapter, ifup, adapter->netdev,
1819                                  "Queue creation failed with error code %d\n",
1820                                  rc);
1821                        return rc;
1822                }
1823
1824                cur_tx_ring_size = adapter->tx_ring[0].ring_size;
1825                cur_rx_ring_size = adapter->rx_ring[0].ring_size;
1826
1827                netif_err(adapter, ifup, adapter->netdev,
1828                          "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
1829                          cur_tx_ring_size, cur_rx_ring_size);
1830
1831                new_tx_ring_size = cur_tx_ring_size;
1832                new_rx_ring_size = cur_rx_ring_size;
1833
1834                /* Decrease the size of the larger queue, or
1835                 * decrease both if they are the same size.
1836                 */
1837                if (cur_rx_ring_size <= cur_tx_ring_size)
1838                        new_tx_ring_size = cur_tx_ring_size / 2;
1839                if (cur_rx_ring_size >= cur_tx_ring_size)
1840                        new_rx_ring_size = cur_rx_ring_size / 2;
1841
1842                if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
1843                    new_rx_ring_size < ENA_MIN_RING_SIZE) {
1844                        netif_err(adapter, ifup, adapter->netdev,
1845                                  "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
1846                                  ENA_MIN_RING_SIZE);
1847                        return rc;
1848                }
1849
1850                netif_err(adapter, ifup, adapter->netdev,
1851                          "Retrying queue creation with sizes TX=%d, RX=%d\n",
1852                          new_tx_ring_size,
1853                          new_rx_ring_size);
1854
1855                set_io_rings_size(adapter, new_tx_ring_size,
1856                                  new_rx_ring_size);
1857        }
1858}
1859
1860static int ena_up(struct ena_adapter *adapter)
1861{
1862        int rc, i;
1863
1864        netdev_dbg(adapter->netdev, "%s\n", __func__);
1865
1866        ena_setup_io_intr(adapter);
1867
1868        /* napi poll functions should be initialized before running
1869         * request_irq(), to handle a rare condition where there is a pending
1870         * interrupt, causing the ISR to fire immediately while the poll
1871         * function wasn't set yet, causing a null dereference
1872         */
1873        ena_init_napi(adapter);
1874
1875        rc = ena_request_io_irq(adapter);
1876        if (rc)
1877                goto err_req_irq;
1878
1879        rc = create_queues_with_size_backoff(adapter);
1880        if (rc)
1881                goto err_create_queues_with_backoff;
1882
1883        rc = ena_up_complete(adapter);
1884        if (rc)
1885                goto err_up;
1886
1887        if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
1888                netif_carrier_on(adapter->netdev);
1889
1890        u64_stats_update_begin(&adapter->syncp);
1891        adapter->dev_stats.interface_up++;
1892        u64_stats_update_end(&adapter->syncp);
1893
1894        set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
1895
1896        /* Enable completion queues interrupt */
1897        for (i = 0; i < adapter->num_queues; i++)
1898                ena_unmask_interrupt(&adapter->tx_ring[i],
1899                                     &adapter->rx_ring[i]);
1900
1901        /* schedule napi in case we had pending packets
1902         * from the last time we disable napi
1903         */
1904        for (i = 0; i < adapter->num_queues; i++)
1905                napi_schedule(&adapter->ena_napi[i].napi);
1906
1907        return rc;
1908
1909err_up:
1910        ena_destroy_all_tx_queues(adapter);
1911        ena_free_all_io_tx_resources(adapter);
1912        ena_destroy_all_rx_queues(adapter);
1913        ena_free_all_io_rx_resources(adapter);
1914err_create_queues_with_backoff:
1915        ena_free_io_irq(adapter);
1916err_req_irq:
1917        ena_del_napi(adapter);
1918
1919        return rc;
1920}
1921
1922static void ena_down(struct ena_adapter *adapter)
1923{
1924        netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
1925
1926        clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
1927
1928        u64_stats_update_begin(&adapter->syncp);
1929        adapter->dev_stats.interface_down++;
1930        u64_stats_update_end(&adapter->syncp);
1931
1932        netif_carrier_off(adapter->netdev);
1933        netif_tx_disable(adapter->netdev);
1934
1935        /* After this point the napi handler won't enable the tx queue */
1936        ena_napi_disable_all(adapter);
1937
1938        /* After destroy the queue there won't be any new interrupts */
1939
1940        if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
1941                int rc;
1942
1943                rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
1944                if (rc)
1945                        dev_err(&adapter->pdev->dev, "Device reset failed\n");
1946                /* stop submitting admin commands on a device that was reset */
1947                ena_com_set_admin_running_state(adapter->ena_dev, false);
1948        }
1949
1950        ena_destroy_all_io_queues(adapter);
1951
1952        ena_disable_io_intr_sync(adapter);
1953        ena_free_io_irq(adapter);
1954        ena_del_napi(adapter);
1955
1956        ena_free_all_tx_bufs(adapter);
1957        ena_free_all_rx_bufs(adapter);
1958        ena_free_all_io_tx_resources(adapter);
1959        ena_free_all_io_rx_resources(adapter);
1960}
1961
1962/* ena_open - Called when a network interface is made active
1963 * @netdev: network interface device structure
1964 *
1965 * Returns 0 on success, negative value on failure
1966 *
1967 * The open entry point is called when a network interface is made
1968 * active by the system (IFF_UP).  At this point all resources needed
1969 * for transmit and receive operations are allocated, the interrupt
1970 * handler is registered with the OS, the watchdog timer is started,
1971 * and the stack is notified that the interface is ready.
1972 */
1973static int ena_open(struct net_device *netdev)
1974{
1975        struct ena_adapter *adapter = netdev_priv(netdev);
1976        int rc;
1977
1978        /* Notify the stack of the actual queue counts. */
1979        rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues);
1980        if (rc) {
1981                netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
1982                return rc;
1983        }
1984
1985        rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues);
1986        if (rc) {
1987                netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
1988                return rc;
1989        }
1990
1991        rc = ena_up(adapter);
1992        if (rc)
1993                return rc;
1994
1995        return rc;
1996}
1997
1998/* ena_close - Disables a network interface
1999 * @netdev: network interface device structure
2000 *
2001 * Returns 0, this is not allowed to fail
2002 *
2003 * The close entry point is called when an interface is de-activated
2004 * by the OS.  The hardware is still under the drivers control, but
2005 * needs to be disabled.  A global MAC reset is issued to stop the
2006 * hardware, and all transmit and receive resources are freed.
2007 */
2008static int ena_close(struct net_device *netdev)
2009{
2010        struct ena_adapter *adapter = netdev_priv(netdev);
2011
2012        netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
2013
2014        if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
2015                return 0;
2016
2017        if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2018                ena_down(adapter);
2019
2020        /* Check for device status and issue reset if needed*/
2021        check_for_admin_com_state(adapter);
2022        if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2023                netif_err(adapter, ifdown, adapter->netdev,
2024                          "Destroy failure, restarting device\n");
2025                ena_dump_stats_to_dmesg(adapter);
2026                /* rtnl lock already obtained in dev_ioctl() layer */
2027                ena_destroy_device(adapter, false);
2028                ena_restore_device(adapter);
2029        }
2030
2031        return 0;
2032}
2033
2034int ena_update_queue_sizes(struct ena_adapter *adapter,
2035                           u32 new_tx_size,
2036                           u32 new_rx_size)
2037{
2038        bool dev_up;
2039
2040        dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2041        ena_close(adapter->netdev);
2042        adapter->requested_tx_ring_size = new_tx_size;
2043        adapter->requested_rx_ring_size = new_rx_size;
2044        ena_init_io_rings(adapter);
2045        return dev_up ? ena_up(adapter) : 0;
2046}
2047
2048static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
2049{
2050        u32 mss = skb_shinfo(skb)->gso_size;
2051        struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
2052        u8 l4_protocol = 0;
2053
2054        if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
2055                ena_tx_ctx->l4_csum_enable = 1;
2056                if (mss) {
2057                        ena_tx_ctx->tso_enable = 1;
2058                        ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
2059                        ena_tx_ctx->l4_csum_partial = 0;
2060                } else {
2061                        ena_tx_ctx->tso_enable = 0;
2062                        ena_meta->l4_hdr_len = 0;
2063                        ena_tx_ctx->l4_csum_partial = 1;
2064                }
2065
2066                switch (ip_hdr(skb)->version) {
2067                case IPVERSION:
2068                        ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2069                        if (ip_hdr(skb)->frag_off & htons(IP_DF))
2070                                ena_tx_ctx->df = 1;
2071                        if (mss)
2072                                ena_tx_ctx->l3_csum_enable = 1;
2073                        l4_protocol = ip_hdr(skb)->protocol;
2074                        break;
2075                case 6:
2076                        ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2077                        l4_protocol = ipv6_hdr(skb)->nexthdr;
2078                        break;
2079                default:
2080                        break;
2081                }
2082
2083                if (l4_protocol == IPPROTO_TCP)
2084                        ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2085                else
2086                        ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2087
2088                ena_meta->mss = mss;
2089                ena_meta->l3_hdr_len = skb_network_header_len(skb);
2090                ena_meta->l3_hdr_offset = skb_network_offset(skb);
2091                ena_tx_ctx->meta_valid = 1;
2092
2093        } else {
2094                ena_tx_ctx->meta_valid = 0;
2095        }
2096}
2097
2098static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
2099                                       struct sk_buff *skb)
2100{
2101        int num_frags, header_len, rc;
2102
2103        num_frags = skb_shinfo(skb)->nr_frags;
2104        header_len = skb_headlen(skb);
2105
2106        if (num_frags < tx_ring->sgl_size)
2107                return 0;
2108
2109        if ((num_frags == tx_ring->sgl_size) &&
2110            (header_len < tx_ring->tx_max_header_size))
2111                return 0;
2112
2113        u64_stats_update_begin(&tx_ring->syncp);
2114        tx_ring->tx_stats.linearize++;
2115        u64_stats_update_end(&tx_ring->syncp);
2116
2117        rc = skb_linearize(skb);
2118        if (unlikely(rc)) {
2119                u64_stats_update_begin(&tx_ring->syncp);
2120                tx_ring->tx_stats.linearize_failed++;
2121                u64_stats_update_end(&tx_ring->syncp);
2122        }
2123
2124        return rc;
2125}
2126
2127static int ena_tx_map_skb(struct ena_ring *tx_ring,
2128                          struct ena_tx_buffer *tx_info,
2129                          struct sk_buff *skb,
2130                          void **push_hdr,
2131                          u16 *header_len)
2132{
2133        struct ena_adapter *adapter = tx_ring->adapter;
2134        struct ena_com_buf *ena_buf;
2135        dma_addr_t dma;
2136        u32 skb_head_len, frag_len, last_frag;
2137        u16 push_len = 0;
2138        u16 delta = 0;
2139        int i = 0;
2140
2141        skb_head_len = skb_headlen(skb);
2142        tx_info->skb = skb;
2143        ena_buf = tx_info->bufs;
2144
2145        if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2146                /* When the device is LLQ mode, the driver will copy
2147                 * the header into the device memory space.
2148                 * the ena_com layer assume the header is in a linear
2149                 * memory space.
2150                 * This assumption might be wrong since part of the header
2151                 * can be in the fragmented buffers.
2152                 * Use skb_header_pointer to make sure the header is in a
2153                 * linear memory space.
2154                 */
2155
2156                push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
2157                *push_hdr = skb_header_pointer(skb, 0, push_len,
2158                                               tx_ring->push_buf_intermediate_buf);
2159                *header_len = push_len;
2160                if (unlikely(skb->data != *push_hdr)) {
2161                        u64_stats_update_begin(&tx_ring->syncp);
2162                        tx_ring->tx_stats.llq_buffer_copy++;
2163                        u64_stats_update_end(&tx_ring->syncp);
2164
2165                        delta = push_len - skb_head_len;
2166                }
2167        } else {
2168                *push_hdr = NULL;
2169                *header_len = min_t(u32, skb_head_len,
2170                                    tx_ring->tx_max_header_size);
2171        }
2172
2173        netif_dbg(adapter, tx_queued, adapter->netdev,
2174                  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
2175                  *push_hdr, push_len);
2176
2177        if (skb_head_len > push_len) {
2178                dma = dma_map_single(tx_ring->dev, skb->data + push_len,
2179                                     skb_head_len - push_len, DMA_TO_DEVICE);
2180                if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2181                        goto error_report_dma_error;
2182
2183                ena_buf->paddr = dma;
2184                ena_buf->len = skb_head_len - push_len;
2185
2186                ena_buf++;
2187                tx_info->num_of_bufs++;
2188                tx_info->map_linear_data = 1;
2189        } else {
2190                tx_info->map_linear_data = 0;
2191        }
2192
2193        last_frag = skb_shinfo(skb)->nr_frags;
2194
2195        for (i = 0; i < last_frag; i++) {
2196                const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2197
2198                frag_len = skb_frag_size(frag);
2199
2200                if (unlikely(delta >= frag_len)) {
2201                        delta -= frag_len;
2202                        continue;
2203                }
2204
2205                dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
2206                                       frag_len - delta, DMA_TO_DEVICE);
2207                if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2208                        goto error_report_dma_error;
2209
2210                ena_buf->paddr = dma;
2211                ena_buf->len = frag_len - delta;
2212                ena_buf++;
2213                tx_info->num_of_bufs++;
2214                delta = 0;
2215        }
2216
2217        return 0;
2218
2219error_report_dma_error:
2220        u64_stats_update_begin(&tx_ring->syncp);
2221        tx_ring->tx_stats.dma_mapping_err++;
2222        u64_stats_update_end(&tx_ring->syncp);
2223        netdev_warn(adapter->netdev, "failed to map skb\n");
2224
2225        tx_info->skb = NULL;
2226
2227        tx_info->num_of_bufs += i;
2228        ena_unmap_tx_skb(tx_ring, tx_info);
2229
2230        return -EINVAL;
2231}
2232
2233/* Called with netif_tx_lock. */
2234static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
2235{
2236        struct ena_adapter *adapter = netdev_priv(dev);
2237        struct ena_tx_buffer *tx_info;
2238        struct ena_com_tx_ctx ena_tx_ctx;
2239        struct ena_ring *tx_ring;
2240        struct netdev_queue *txq;
2241        void *push_hdr;
2242        u16 next_to_use, req_id, header_len;
2243        int qid, rc, nb_hw_desc;
2244
2245        netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
2246        /*  Determine which tx ring we will be placed on */
2247        qid = skb_get_queue_mapping(skb);
2248        tx_ring = &adapter->tx_ring[qid];
2249        txq = netdev_get_tx_queue(dev, qid);
2250
2251        rc = ena_check_and_linearize_skb(tx_ring, skb);
2252        if (unlikely(rc))
2253                goto error_drop_packet;
2254
2255        skb_tx_timestamp(skb);
2256
2257        next_to_use = tx_ring->next_to_use;
2258        req_id = tx_ring->free_ids[next_to_use];
2259        tx_info = &tx_ring->tx_buffer_info[req_id];
2260        tx_info->num_of_bufs = 0;
2261
2262        WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
2263
2264        rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
2265        if (unlikely(rc))
2266                goto error_drop_packet;
2267
2268        memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
2269        ena_tx_ctx.ena_bufs = tx_info->bufs;
2270        ena_tx_ctx.push_header = push_hdr;
2271        ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
2272        ena_tx_ctx.req_id = req_id;
2273        ena_tx_ctx.header_len = header_len;
2274
2275        /* set flags and meta data */
2276        ena_tx_csum(&ena_tx_ctx, skb);
2277
2278        if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx))) {
2279                netif_dbg(adapter, tx_queued, dev,
2280                          "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
2281                          qid);
2282                ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
2283        }
2284
2285        /* prepare the packet's descriptors to dma engine */
2286        rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
2287                                &nb_hw_desc);
2288
2289        /* ena_com_prepare_tx() can't fail due to overflow of tx queue,
2290         * since the number of free descriptors in the queue is checked
2291         * after sending the previous packet. In case there isn't enough
2292         * space in the queue for the next packet, it is stopped
2293         * until there is again enough available space in the queue.
2294         * All other failure reasons of ena_com_prepare_tx() are fatal
2295         * and therefore require a device reset.
2296         */
2297        if (unlikely(rc)) {
2298                netif_err(adapter, tx_queued, dev,
2299                          "failed to prepare tx bufs\n");
2300                u64_stats_update_begin(&tx_ring->syncp);
2301                tx_ring->tx_stats.prepare_ctx_err++;
2302                u64_stats_update_end(&tx_ring->syncp);
2303                adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE;
2304                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2305                goto error_unmap_dma;
2306        }
2307
2308        netdev_tx_sent_queue(txq, skb->len);
2309
2310        u64_stats_update_begin(&tx_ring->syncp);
2311        tx_ring->tx_stats.cnt++;
2312        tx_ring->tx_stats.bytes += skb->len;
2313        u64_stats_update_end(&tx_ring->syncp);
2314
2315        tx_info->tx_descs = nb_hw_desc;
2316        tx_info->last_jiffies = jiffies;
2317        tx_info->print_once = 0;
2318
2319        tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
2320                tx_ring->ring_size);
2321
2322        /* stop the queue when no more space available, the packet can have up
2323         * to sgl_size + 2. one for the meta descriptor and one for header
2324         * (if the header is larger than tx_max_header_size).
2325         */
2326        if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
2327                                                   tx_ring->sgl_size + 2))) {
2328                netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
2329                          __func__, qid);
2330
2331                netif_tx_stop_queue(txq);
2332                u64_stats_update_begin(&tx_ring->syncp);
2333                tx_ring->tx_stats.queue_stop++;
2334                u64_stats_update_end(&tx_ring->syncp);
2335
2336                /* There is a rare condition where this function decide to
2337                 * stop the queue but meanwhile clean_tx_irq updates
2338                 * next_to_completion and terminates.
2339                 * The queue will remain stopped forever.
2340                 * To solve this issue add a mb() to make sure that
2341                 * netif_tx_stop_queue() write is vissible before checking if
2342                 * there is additional space in the queue.
2343                 */
2344                smp_mb();
2345
2346                if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
2347                                                 ENA_TX_WAKEUP_THRESH)) {
2348                        netif_tx_wake_queue(txq);
2349                        u64_stats_update_begin(&tx_ring->syncp);
2350                        tx_ring->tx_stats.queue_wakeup++;
2351                        u64_stats_update_end(&tx_ring->syncp);
2352                }
2353        }
2354
2355        if (netif_xmit_stopped(txq) || !netdev_xmit_more()) {
2356                /* trigger the dma engine. ena_com_write_sq_doorbell()
2357                 * has a mb
2358                 */
2359                ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
2360                u64_stats_update_begin(&tx_ring->syncp);
2361                tx_ring->tx_stats.doorbells++;
2362                u64_stats_update_end(&tx_ring->syncp);
2363        }
2364
2365        return NETDEV_TX_OK;
2366
2367error_unmap_dma:
2368        ena_unmap_tx_skb(tx_ring, tx_info);
2369        tx_info->skb = NULL;
2370
2371error_drop_packet:
2372        dev_kfree_skb(skb);
2373        return NETDEV_TX_OK;
2374}
2375
2376static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
2377                            struct net_device *sb_dev)
2378{
2379        u16 qid;
2380        /* we suspect that this is good for in--kernel network services that
2381         * want to loop incoming skb rx to tx in normal user generated traffic,
2382         * most probably we will not get to this
2383         */
2384        if (skb_rx_queue_recorded(skb))
2385                qid = skb_get_rx_queue(skb);
2386        else
2387                qid = netdev_pick_tx(dev, skb, NULL);
2388
2389        return qid;
2390}
2391
2392static void ena_config_host_info(struct ena_com_dev *ena_dev,
2393                                 struct pci_dev *pdev)
2394{
2395        struct ena_admin_host_info *host_info;
2396        int rc;
2397
2398        /* Allocate only the host info */
2399        rc = ena_com_allocate_host_info(ena_dev);
2400        if (rc) {
2401                pr_err("Cannot allocate host info\n");
2402                return;
2403        }
2404
2405        host_info = ena_dev->host_attr.host_info;
2406
2407        host_info->bdf = (pdev->bus->number << 8) | pdev->devfn;
2408        host_info->os_type = ENA_ADMIN_OS_LINUX;
2409        host_info->kernel_ver = LINUX_VERSION_CODE;
2410        strlcpy(host_info->kernel_ver_str, utsname()->version,
2411                sizeof(host_info->kernel_ver_str) - 1);
2412        host_info->os_dist = 0;
2413        strncpy(host_info->os_dist_str, utsname()->release,
2414                sizeof(host_info->os_dist_str) - 1);
2415        host_info->driver_version =
2416                (DRV_MODULE_VER_MAJOR) |
2417                (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2418                (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
2419                ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
2420        host_info->num_cpus = num_online_cpus();
2421
2422        rc = ena_com_set_host_attributes(ena_dev);
2423        if (rc) {
2424                if (rc == -EOPNOTSUPP)
2425                        pr_warn("Cannot set host attributes\n");
2426                else
2427                        pr_err("Cannot set host attributes\n");
2428
2429                goto err;
2430        }
2431
2432        return;
2433
2434err:
2435        ena_com_delete_host_info(ena_dev);
2436}
2437
2438static void ena_config_debug_area(struct ena_adapter *adapter)
2439{
2440        u32 debug_area_size;
2441        int rc, ss_count;
2442
2443        ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
2444        if (ss_count <= 0) {
2445                netif_err(adapter, drv, adapter->netdev,
2446                          "SS count is negative\n");
2447                return;
2448        }
2449
2450        /* allocate 32 bytes for each string and 64bit for the value */
2451        debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
2452
2453        rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
2454        if (rc) {
2455                pr_err("Cannot allocate debug area\n");
2456                return;
2457        }
2458
2459        rc = ena_com_set_host_attributes(adapter->ena_dev);
2460        if (rc) {
2461                if (rc == -EOPNOTSUPP)
2462                        netif_warn(adapter, drv, adapter->netdev,
2463                                   "Cannot set host attributes\n");
2464                else
2465                        netif_err(adapter, drv, adapter->netdev,
2466                                  "Cannot set host attributes\n");
2467                goto err;
2468        }
2469
2470        return;
2471err:
2472        ena_com_delete_debug_area(adapter->ena_dev);
2473}
2474
2475static void ena_get_stats64(struct net_device *netdev,
2476                            struct rtnl_link_stats64 *stats)
2477{
2478        struct ena_adapter *adapter = netdev_priv(netdev);
2479        struct ena_ring *rx_ring, *tx_ring;
2480        unsigned int start;
2481        u64 rx_drops;
2482        int i;
2483
2484        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2485                return;
2486
2487        for (i = 0; i < adapter->num_queues; i++) {
2488                u64 bytes, packets;
2489
2490                tx_ring = &adapter->tx_ring[i];
2491
2492                do {
2493                        start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
2494                        packets = tx_ring->tx_stats.cnt;
2495                        bytes = tx_ring->tx_stats.bytes;
2496                } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
2497
2498                stats->tx_packets += packets;
2499                stats->tx_bytes += bytes;
2500
2501                rx_ring = &adapter->rx_ring[i];
2502
2503                do {
2504                        start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
2505                        packets = rx_ring->rx_stats.cnt;
2506                        bytes = rx_ring->rx_stats.bytes;
2507                } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
2508
2509                stats->rx_packets += packets;
2510                stats->rx_bytes += bytes;
2511        }
2512
2513        do {
2514                start = u64_stats_fetch_begin_irq(&adapter->syncp);
2515                rx_drops = adapter->dev_stats.rx_drops;
2516        } while (u64_stats_fetch_retry_irq(&adapter->syncp, start));
2517
2518        stats->rx_dropped = rx_drops;
2519
2520        stats->multicast = 0;
2521        stats->collisions = 0;
2522
2523        stats->rx_length_errors = 0;
2524        stats->rx_crc_errors = 0;
2525        stats->rx_frame_errors = 0;
2526        stats->rx_fifo_errors = 0;
2527        stats->rx_missed_errors = 0;
2528        stats->tx_window_errors = 0;
2529
2530        stats->rx_errors = 0;
2531        stats->tx_errors = 0;
2532}
2533
2534static const struct net_device_ops ena_netdev_ops = {
2535        .ndo_open               = ena_open,
2536        .ndo_stop               = ena_close,
2537        .ndo_start_xmit         = ena_start_xmit,
2538        .ndo_select_queue       = ena_select_queue,
2539        .ndo_get_stats64        = ena_get_stats64,
2540        .ndo_tx_timeout         = ena_tx_timeout,
2541        .ndo_change_mtu         = ena_change_mtu,
2542        .ndo_set_mac_address    = NULL,
2543        .ndo_validate_addr      = eth_validate_addr,
2544};
2545
2546static int ena_device_validate_params(struct ena_adapter *adapter,
2547                                      struct ena_com_dev_get_features_ctx *get_feat_ctx)
2548{
2549        struct net_device *netdev = adapter->netdev;
2550        int rc;
2551
2552        rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
2553                              adapter->mac_addr);
2554        if (!rc) {
2555                netif_err(adapter, drv, netdev,
2556                          "Error, mac address are different\n");
2557                return -EINVAL;
2558        }
2559
2560        if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
2561                netif_err(adapter, drv, netdev,
2562                          "Error, device max mtu is smaller than netdev MTU\n");
2563                return -EINVAL;
2564        }
2565
2566        return 0;
2567}
2568
2569static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
2570                           struct ena_com_dev_get_features_ctx *get_feat_ctx,
2571                           bool *wd_state)
2572{
2573        struct device *dev = &pdev->dev;
2574        bool readless_supported;
2575        u32 aenq_groups;
2576        int dma_width;
2577        int rc;
2578
2579        rc = ena_com_mmio_reg_read_request_init(ena_dev);
2580        if (rc) {
2581                dev_err(dev, "failed to init mmio read less\n");
2582                return rc;
2583        }
2584
2585        /* The PCIe configuration space revision id indicate if mmio reg
2586         * read is disabled
2587         */
2588        readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
2589        ena_com_set_mmio_read_mode(ena_dev, readless_supported);
2590
2591        rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
2592        if (rc) {
2593                dev_err(dev, "Can not reset device\n");
2594                goto err_mmio_read_less;
2595        }
2596
2597        rc = ena_com_validate_version(ena_dev);
2598        if (rc) {
2599                dev_err(dev, "device version is too low\n");
2600                goto err_mmio_read_less;
2601        }
2602
2603        dma_width = ena_com_get_dma_width(ena_dev);
2604        if (dma_width < 0) {
2605                dev_err(dev, "Invalid dma width value %d", dma_width);
2606                rc = dma_width;
2607                goto err_mmio_read_less;
2608        }
2609
2610        rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
2611        if (rc) {
2612                dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc);
2613                goto err_mmio_read_less;
2614        }
2615
2616        rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
2617        if (rc) {
2618                dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n",
2619                        rc);
2620                goto err_mmio_read_less;
2621        }
2622
2623        /* ENA admin level init */
2624        rc = ena_com_admin_init(ena_dev, &aenq_handlers);
2625        if (rc) {
2626                dev_err(dev,
2627                        "Can not initialize ena admin queue with device\n");
2628                goto err_mmio_read_less;
2629        }
2630
2631        /* To enable the msix interrupts the driver needs to know the number
2632         * of queues. So the driver uses polling mode to retrieve this
2633         * information
2634         */
2635        ena_com_set_admin_polling_mode(ena_dev, true);
2636
2637        ena_config_host_info(ena_dev, pdev);
2638
2639        /* Get Device Attributes*/
2640        rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
2641        if (rc) {
2642                dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
2643                goto err_admin_init;
2644        }
2645
2646        /* Try to turn all the available aenq groups */
2647        aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
2648                BIT(ENA_ADMIN_FATAL_ERROR) |
2649                BIT(ENA_ADMIN_WARNING) |
2650                BIT(ENA_ADMIN_NOTIFICATION) |
2651                BIT(ENA_ADMIN_KEEP_ALIVE);
2652
2653        aenq_groups &= get_feat_ctx->aenq.supported_groups;
2654
2655        rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
2656        if (rc) {
2657                dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
2658                goto err_admin_init;
2659        }
2660
2661        *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
2662
2663        return 0;
2664
2665err_admin_init:
2666        ena_com_delete_host_info(ena_dev);
2667        ena_com_admin_destroy(ena_dev);
2668err_mmio_read_less:
2669        ena_com_mmio_reg_read_request_destroy(ena_dev);
2670
2671        return rc;
2672}
2673
2674static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
2675                                                    int io_vectors)
2676{
2677        struct ena_com_dev *ena_dev = adapter->ena_dev;
2678        struct device *dev = &adapter->pdev->dev;
2679        int rc;
2680
2681        rc = ena_enable_msix(adapter, io_vectors);
2682        if (rc) {
2683                dev_err(dev, "Can not reserve msix vectors\n");
2684                return rc;
2685        }
2686
2687        ena_setup_mgmnt_intr(adapter);
2688
2689        rc = ena_request_mgmnt_irq(adapter);
2690        if (rc) {
2691                dev_err(dev, "Can not setup management interrupts\n");
2692                goto err_disable_msix;
2693        }
2694
2695        ena_com_set_admin_polling_mode(ena_dev, false);
2696
2697        ena_com_admin_aenq_enable(ena_dev);
2698
2699        return 0;
2700
2701err_disable_msix:
2702        ena_disable_msix(adapter);
2703
2704        return rc;
2705}
2706
2707static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
2708{
2709        struct net_device *netdev = adapter->netdev;
2710        struct ena_com_dev *ena_dev = adapter->ena_dev;
2711        bool dev_up;
2712
2713        if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
2714                return;
2715
2716        netif_carrier_off(netdev);
2717
2718        del_timer_sync(&adapter->timer_service);
2719
2720        dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2721        adapter->dev_up_before_reset = dev_up;
2722        if (!graceful)
2723                ena_com_set_admin_running_state(ena_dev, false);
2724
2725        if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2726                ena_down(adapter);
2727
2728        /* Stop the device from sending AENQ events (in case reset flag is set
2729         *  and device is up, ena_down() already reset the device.
2730         */
2731        if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
2732                ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
2733
2734        ena_free_mgmnt_irq(adapter);
2735
2736        ena_disable_msix(adapter);
2737
2738        ena_com_abort_admin_commands(ena_dev);
2739
2740        ena_com_wait_for_abort_completion(ena_dev);
2741
2742        ena_com_admin_destroy(ena_dev);
2743
2744        ena_com_mmio_reg_read_request_destroy(ena_dev);
2745
2746        adapter->reset_reason = ENA_REGS_RESET_NORMAL;
2747
2748        clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2749        clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
2750}
2751
2752static int ena_restore_device(struct ena_adapter *adapter)
2753{
2754        struct ena_com_dev_get_features_ctx get_feat_ctx;
2755        struct ena_com_dev *ena_dev = adapter->ena_dev;
2756        struct pci_dev *pdev = adapter->pdev;
2757        bool wd_state;
2758        int rc;
2759
2760        set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
2761        rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
2762        if (rc) {
2763                dev_err(&pdev->dev, "Can not initialize device\n");
2764                goto err;
2765        }
2766        adapter->wd_state = wd_state;
2767
2768        rc = ena_device_validate_params(adapter, &get_feat_ctx);
2769        if (rc) {
2770                dev_err(&pdev->dev, "Validation of device parameters failed\n");
2771                goto err_device_destroy;
2772        }
2773
2774        rc = ena_enable_msix_and_set_admin_interrupts(adapter,
2775                                                      adapter->num_queues);
2776        if (rc) {
2777                dev_err(&pdev->dev, "Enable MSI-X failed\n");
2778                goto err_device_destroy;
2779        }
2780        /* If the interface was up before the reset bring it up */
2781        if (adapter->dev_up_before_reset) {
2782                rc = ena_up(adapter);
2783                if (rc) {
2784                        dev_err(&pdev->dev, "Failed to create I/O queues\n");
2785                        goto err_disable_msix;
2786                }
2787        }
2788
2789        set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
2790
2791        clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
2792        if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
2793                netif_carrier_on(adapter->netdev);
2794
2795        mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
2796        dev_err(&pdev->dev,
2797                "Device reset completed successfully, Driver info: %s\n",
2798                version);
2799
2800        return rc;
2801err_disable_msix:
2802        ena_free_mgmnt_irq(adapter);
2803        ena_disable_msix(adapter);
2804err_device_destroy:
2805        ena_com_abort_admin_commands(ena_dev);
2806        ena_com_wait_for_abort_completion(ena_dev);
2807        ena_com_admin_destroy(ena_dev);
2808        ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
2809        ena_com_mmio_reg_read_request_destroy(ena_dev);
2810err:
2811        clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
2812        clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
2813        dev_err(&pdev->dev,
2814                "Reset attempt failed. Can not reset the device\n");
2815
2816        return rc;
2817}
2818
2819static void ena_fw_reset_device(struct work_struct *work)
2820{
2821        struct ena_adapter *adapter =
2822                container_of(work, struct ena_adapter, reset_task);
2823        struct pci_dev *pdev = adapter->pdev;
2824
2825        if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2826                dev_err(&pdev->dev,
2827                        "device reset schedule while reset bit is off\n");
2828                return;
2829        }
2830        rtnl_lock();
2831        ena_destroy_device(adapter, false);
2832        ena_restore_device(adapter);
2833        rtnl_unlock();
2834}
2835
2836static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
2837                                        struct ena_ring *rx_ring)
2838{
2839        if (likely(rx_ring->first_interrupt))
2840                return 0;
2841
2842        if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
2843                return 0;
2844
2845        rx_ring->no_interrupt_event_cnt++;
2846
2847        if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
2848                netif_err(adapter, rx_err, adapter->netdev,
2849                          "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
2850                          rx_ring->qid);
2851                adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
2852                smp_mb__before_atomic();
2853                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2854                return -EIO;
2855        }
2856
2857        return 0;
2858}
2859
2860static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
2861                                          struct ena_ring *tx_ring)
2862{
2863        struct ena_tx_buffer *tx_buf;
2864        unsigned long last_jiffies;
2865        u32 missed_tx = 0;
2866        int i, rc = 0;
2867
2868        for (i = 0; i < tx_ring->ring_size; i++) {
2869                tx_buf = &tx_ring->tx_buffer_info[i];
2870                last_jiffies = tx_buf->last_jiffies;
2871
2872                if (last_jiffies == 0)
2873                        /* no pending Tx at this location */
2874                        continue;
2875
2876                if (unlikely(!tx_ring->first_interrupt && time_is_before_jiffies(last_jiffies +
2877                             2 * adapter->missing_tx_completion_to))) {
2878                        /* If after graceful period interrupt is still not
2879                         * received, we schedule a reset
2880                         */
2881                        netif_err(adapter, tx_err, adapter->netdev,
2882                                  "Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
2883                                  tx_ring->qid);
2884                        adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
2885                        smp_mb__before_atomic();
2886                        set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2887                        return -EIO;
2888                }
2889
2890                if (unlikely(time_is_before_jiffies(last_jiffies +
2891                                adapter->missing_tx_completion_to))) {
2892                        if (!tx_buf->print_once)
2893                                netif_notice(adapter, tx_err, adapter->netdev,
2894                                             "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
2895                                             tx_ring->qid, i);
2896
2897                        tx_buf->print_once = 1;
2898                        missed_tx++;
2899                }
2900        }
2901
2902        if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
2903                netif_err(adapter, tx_err, adapter->netdev,
2904                          "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
2905                          missed_tx,
2906                          adapter->missing_tx_completion_threshold);
2907                adapter->reset_reason =
2908                        ENA_REGS_RESET_MISS_TX_CMPL;
2909                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
2910                rc = -EIO;
2911        }
2912
2913        u64_stats_update_begin(&tx_ring->syncp);
2914        tx_ring->tx_stats.missed_tx = missed_tx;
2915        u64_stats_update_end(&tx_ring->syncp);
2916
2917        return rc;
2918}
2919
2920static void check_for_missing_completions(struct ena_adapter *adapter)
2921{
2922        struct ena_ring *tx_ring;
2923        struct ena_ring *rx_ring;
2924        int i, budget, rc;
2925
2926        /* Make sure the driver doesn't turn the device in other process */
2927        smp_rmb();
2928
2929        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2930                return;
2931
2932        if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
2933                return;
2934
2935        if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
2936                return;
2937
2938        budget = ENA_MONITORED_TX_QUEUES;
2939
2940        for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
2941                tx_ring = &adapter->tx_ring[i];
2942                rx_ring = &adapter->rx_ring[i];
2943
2944                rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
2945                if (unlikely(rc))
2946                        return;
2947
2948                rc = check_for_rx_interrupt_queue(adapter, rx_ring);
2949                if (unlikely(rc))
2950                        return;
2951
2952                budget--;
2953                if (!budget)
2954                        break;
2955        }
2956
2957        adapter->last_monitored_tx_qid = i % adapter->num_queues;
2958}
2959
2960/* trigger napi schedule after 2 consecutive detections */
2961#define EMPTY_RX_REFILL 2
2962/* For the rare case where the device runs out of Rx descriptors and the
2963 * napi handler failed to refill new Rx descriptors (due to a lack of memory
2964 * for example).
2965 * This case will lead to a deadlock:
2966 * The device won't send interrupts since all the new Rx packets will be dropped
2967 * The napi handler won't allocate new Rx descriptors so the device will be
2968 * able to send new packets.
2969 *
2970 * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
2971 * It is recommended to have at least 512MB, with a minimum of 128MB for
2972 * constrained environment).
2973 *
2974 * When such a situation is detected - Reschedule napi
2975 */
2976static void check_for_empty_rx_ring(struct ena_adapter *adapter)
2977{
2978        struct ena_ring *rx_ring;
2979        int i, refill_required;
2980
2981        if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2982                return;
2983
2984        if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
2985                return;
2986
2987        for (i = 0; i < adapter->num_queues; i++) {
2988                rx_ring = &adapter->rx_ring[i];
2989
2990                refill_required =
2991                        ena_com_free_desc(rx_ring->ena_com_io_sq);
2992                if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
2993                        rx_ring->empty_rx_queue++;
2994
2995                        if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
2996                                u64_stats_update_begin(&rx_ring->syncp);
2997                                rx_ring->rx_stats.empty_rx_ring++;
2998                                u64_stats_update_end(&rx_ring->syncp);
2999
3000                                netif_err(adapter, drv, adapter->netdev,
3001                                          "trigger refill for ring %d\n", i);
3002
3003                                napi_schedule(rx_ring->napi);
3004                                rx_ring->empty_rx_queue = 0;
3005                        }
3006                } else {
3007                        rx_ring->empty_rx_queue = 0;
3008                }
3009        }
3010}
3011
3012/* Check for keep alive expiration */
3013static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3014{
3015        unsigned long keep_alive_expired;
3016
3017        if (!adapter->wd_state)
3018                return;
3019
3020        if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3021                return;
3022
3023        keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies +
3024                                           adapter->keep_alive_timeout);
3025        if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
3026                netif_err(adapter, drv, adapter->netdev,
3027                          "Keep alive watchdog timeout.\n");
3028                u64_stats_update_begin(&adapter->syncp);
3029                adapter->dev_stats.wd_expired++;
3030                u64_stats_update_end(&adapter->syncp);
3031                adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
3032                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3033        }
3034}
3035
3036static void check_for_admin_com_state(struct ena_adapter *adapter)
3037{
3038        if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
3039                netif_err(adapter, drv, adapter->netdev,
3040                          "ENA admin queue is not in running state!\n");
3041                u64_stats_update_begin(&adapter->syncp);
3042                adapter->dev_stats.admin_q_pause++;
3043                u64_stats_update_end(&adapter->syncp);
3044                adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
3045                set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3046        }
3047}
3048
3049static void ena_update_hints(struct ena_adapter *adapter,
3050                             struct ena_admin_ena_hw_hints *hints)
3051{
3052        struct net_device *netdev = adapter->netdev;
3053
3054        if (hints->admin_completion_tx_timeout)
3055                adapter->ena_dev->admin_queue.completion_timeout =
3056                        hints->admin_completion_tx_timeout * 1000;
3057
3058        if (hints->mmio_read_timeout)
3059                /* convert to usec */
3060                adapter->ena_dev->mmio_read.reg_read_to =
3061                        hints->mmio_read_timeout * 1000;
3062
3063        if (hints->missed_tx_completion_count_threshold_to_reset)
3064                adapter->missing_tx_completion_threshold =
3065                        hints->missed_tx_completion_count_threshold_to_reset;
3066
3067        if (hints->missing_tx_completion_timeout) {
3068                if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3069                        adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
3070                else
3071                        adapter->missing_tx_completion_to =
3072                                msecs_to_jiffies(hints->missing_tx_completion_timeout);
3073        }
3074
3075        if (hints->netdev_wd_timeout)
3076                netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
3077
3078        if (hints->driver_watchdog_timeout) {
3079                if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3080                        adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3081                else
3082                        adapter->keep_alive_timeout =
3083                                msecs_to_jiffies(hints->driver_watchdog_timeout);
3084        }
3085}
3086
3087static void ena_update_host_info(struct ena_admin_host_info *host_info,
3088                                 struct net_device *netdev)
3089{
3090        host_info->supported_network_features[0] =
3091                netdev->features & GENMASK_ULL(31, 0);
3092        host_info->supported_network_features[1] =
3093                (netdev->features & GENMASK_ULL(63, 32)) >> 32;
3094}
3095
3096static void ena_timer_service(struct timer_list *t)
3097{
3098        struct ena_adapter *adapter = from_timer(adapter, t, timer_service);
3099        u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
3100        struct ena_admin_host_info *host_info =
3101                adapter->ena_dev->host_attr.host_info;
3102
3103        check_for_missing_keep_alive(adapter);
3104
3105        check_for_admin_com_state(adapter);
3106
3107        check_for_missing_completions(adapter);
3108
3109        check_for_empty_rx_ring(adapter);
3110
3111        if (debug_area)
3112                ena_dump_stats_to_buf(adapter, debug_area);
3113
3114        if (host_info)
3115                ena_update_host_info(host_info, adapter->netdev);
3116
3117        if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3118                netif_err(adapter, drv, adapter->netdev,
3119                          "Trigger reset is on\n");
3120                ena_dump_stats_to_dmesg(adapter);
3121                queue_work(ena_wq, &adapter->reset_task);
3122                return;
3123        }
3124
3125        /* Reset the timer */
3126        mod_timer(&adapter->timer_service, jiffies + HZ);
3127}
3128
3129static int ena_calc_io_queue_num(struct pci_dev *pdev,
3130                                 struct ena_com_dev *ena_dev,
3131                                 struct ena_com_dev_get_features_ctx *get_feat_ctx)
3132{
3133        int io_tx_sq_num, io_tx_cq_num, io_rx_num, io_queue_num;
3134
3135        if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3136                struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3137                        &get_feat_ctx->max_queue_ext.max_queue_ext;
3138                io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
3139                                  max_queue_ext->max_rx_cq_num);
3140
3141                io_tx_sq_num = max_queue_ext->max_tx_sq_num;
3142                io_tx_cq_num = max_queue_ext->max_tx_cq_num;
3143        } else {
3144                struct ena_admin_queue_feature_desc *max_queues =
3145                        &get_feat_ctx->max_queues;
3146                io_tx_sq_num = max_queues->max_sq_num;
3147                io_tx_cq_num = max_queues->max_cq_num;
3148                io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
3149        }
3150
3151        /* In case of LLQ use the llq fields for the tx SQ/CQ */
3152        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3153                io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
3154
3155        io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
3156        io_queue_num = min_t(int, io_queue_num, io_rx_num);
3157        io_queue_num = min_t(int, io_queue_num, io_tx_sq_num);
3158        io_queue_num = min_t(int, io_queue_num, io_tx_cq_num);
3159        /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
3160        io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1);
3161        if (unlikely(!io_queue_num)) {
3162                dev_err(&pdev->dev, "The device doesn't have io queues\n");
3163                return -EFAULT;
3164        }
3165
3166        return io_queue_num;
3167}
3168
3169static int ena_set_queues_placement_policy(struct pci_dev *pdev,
3170                                           struct ena_com_dev *ena_dev,
3171                                           struct ena_admin_feature_llq_desc *llq,
3172                                           struct ena_llq_configurations *llq_default_configurations)
3173{
3174        bool has_mem_bar;
3175        int rc;
3176        u32 llq_feature_mask;
3177
3178        llq_feature_mask = 1 << ENA_ADMIN_LLQ;
3179        if (!(ena_dev->supported_features & llq_feature_mask)) {
3180                dev_err(&pdev->dev,
3181                        "LLQ is not supported Fallback to host mode policy.\n");
3182                ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3183                return 0;
3184        }
3185
3186        has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR);
3187
3188        rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
3189        if (unlikely(rc)) {
3190                dev_err(&pdev->dev,
3191                        "Failed to configure the device mode.  Fallback to host mode policy.\n");
3192                ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3193                return 0;
3194        }
3195
3196        /* Nothing to config, exit */
3197        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
3198                return 0;
3199
3200        if (!has_mem_bar) {
3201                dev_err(&pdev->dev,
3202                        "ENA device does not expose LLQ bar. Fallback to host mode policy.\n");
3203                ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3204                return 0;
3205        }
3206
3207        ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3208                                           pci_resource_start(pdev, ENA_MEM_BAR),
3209                                           pci_resource_len(pdev, ENA_MEM_BAR));
3210
3211        if (!ena_dev->mem_bar)
3212                return -EFAULT;
3213
3214        return 0;
3215}
3216
3217static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
3218                                 struct net_device *netdev)
3219{
3220        netdev_features_t dev_features = 0;
3221
3222        /* Set offload features */
3223        if (feat->offload.tx &
3224                ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
3225                dev_features |= NETIF_F_IP_CSUM;
3226
3227        if (feat->offload.tx &
3228                ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
3229                dev_features |= NETIF_F_IPV6_CSUM;
3230
3231        if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
3232                dev_features |= NETIF_F_TSO;
3233
3234        if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
3235                dev_features |= NETIF_F_TSO6;
3236
3237        if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
3238                dev_features |= NETIF_F_TSO_ECN;
3239
3240        if (feat->offload.rx_supported &
3241                ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
3242                dev_features |= NETIF_F_RXCSUM;
3243
3244        if (feat->offload.rx_supported &
3245                ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
3246                dev_features |= NETIF_F_RXCSUM;
3247
3248        netdev->features =
3249                dev_features |
3250                NETIF_F_SG |
3251                NETIF_F_RXHASH |
3252                NETIF_F_HIGHDMA;
3253
3254        netdev->hw_features |= netdev->features;
3255        netdev->vlan_features |= netdev->features;
3256}
3257
3258static void ena_set_conf_feat_params(struct ena_adapter *adapter,
3259                                     struct ena_com_dev_get_features_ctx *feat)
3260{
3261        struct net_device *netdev = adapter->netdev;
3262
3263        /* Copy mac address */
3264        if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
3265                eth_hw_addr_random(netdev);
3266                ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
3267        } else {
3268                ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
3269                ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
3270        }
3271
3272        /* Set offload features */
3273        ena_set_dev_offloads(feat, netdev);
3274
3275        adapter->max_mtu = feat->dev_attr.max_mtu;
3276        netdev->max_mtu = adapter->max_mtu;
3277        netdev->min_mtu = ENA_MIN_MTU;
3278}
3279
3280static int ena_rss_init_default(struct ena_adapter *adapter)
3281{
3282        struct ena_com_dev *ena_dev = adapter->ena_dev;
3283        struct device *dev = &adapter->pdev->dev;
3284        int rc, i;
3285        u32 val;
3286
3287        rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
3288        if (unlikely(rc)) {
3289                dev_err(dev, "Cannot init indirect table\n");
3290                goto err_rss_init;
3291        }
3292
3293        for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3294                val = ethtool_rxfh_indir_default(i, adapter->num_queues);
3295                rc = ena_com_indirect_table_fill_entry(ena_dev, i,
3296                                                       ENA_IO_RXQ_IDX(val));
3297                if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3298                        dev_err(dev, "Cannot fill indirect table\n");
3299                        goto err_fill_indir;
3300                }
3301        }
3302
3303        rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
3304                                        ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
3305        if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3306                dev_err(dev, "Cannot fill hash function\n");
3307                goto err_fill_indir;
3308        }
3309
3310        rc = ena_com_set_default_hash_ctrl(ena_dev);
3311        if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3312                dev_err(dev, "Cannot fill hash control\n");
3313                goto err_fill_indir;
3314        }
3315
3316        return 0;
3317
3318err_fill_indir:
3319        ena_com_rss_destroy(ena_dev);
3320err_rss_init:
3321
3322        return rc;
3323}
3324
3325static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
3326{
3327        int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3328
3329        pci_release_selected_regions(pdev, release_bars);
3330}
3331
3332static void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
3333{
3334        llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
3335        llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
3336        llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
3337        llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
3338        llq_config->llq_ring_entry_size_value = 128;
3339}
3340
3341static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx)
3342{
3343        struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
3344        struct ena_com_dev *ena_dev = ctx->ena_dev;
3345        u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
3346        u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
3347        u32 max_tx_queue_size;
3348        u32 max_rx_queue_size;
3349
3350        if (ctx->ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3351                struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3352                        &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
3353                max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
3354                                          max_queue_ext->max_rx_sq_depth);
3355                max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
3356
3357                if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3358                        max_tx_queue_size = min_t(u32, max_tx_queue_size,
3359                                                  llq->max_llq_depth);
3360                else
3361                        max_tx_queue_size = min_t(u32, max_tx_queue_size,
3362                                                  max_queue_ext->max_tx_sq_depth);
3363
3364                ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
3365                                             max_queue_ext->max_per_packet_tx_descs);
3366                ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
3367                                             max_queue_ext->max_per_packet_rx_descs);
3368        } else {
3369                struct ena_admin_queue_feature_desc *max_queues =
3370                        &ctx->get_feat_ctx->max_queues;
3371                max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
3372                                          max_queues->max_sq_depth);
3373                max_tx_queue_size = max_queues->max_cq_depth;
3374
3375                if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3376                        max_tx_queue_size = min_t(u32, max_tx_queue_size,
3377                                                  llq->max_llq_depth);
3378                else
3379                        max_tx_queue_size = min_t(u32, max_tx_queue_size,
3380                                                  max_queues->max_sq_depth);
3381
3382                ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
3383                                             max_queues->max_packet_tx_descs);
3384                ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
3385                                             max_queues->max_packet_rx_descs);
3386        }
3387
3388        max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
3389        max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
3390
3391        tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
3392                                  max_tx_queue_size);
3393        rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
3394                                  max_rx_queue_size);
3395
3396        tx_queue_size = rounddown_pow_of_two(tx_queue_size);
3397        rx_queue_size = rounddown_pow_of_two(rx_queue_size);
3398
3399        ctx->max_tx_queue_size = max_tx_queue_size;
3400        ctx->max_rx_queue_size = max_rx_queue_size;
3401        ctx->tx_queue_size = tx_queue_size;
3402        ctx->rx_queue_size = rx_queue_size;
3403
3404        return 0;
3405}
3406
3407/* ena_probe - Device Initialization Routine
3408 * @pdev: PCI device information struct
3409 * @ent: entry in ena_pci_tbl
3410 *
3411 * Returns 0 on success, negative on failure
3412 *
3413 * ena_probe initializes an adapter identified by a pci_dev structure.
3414 * The OS initialization, configuring of the adapter private structure,
3415 * and a hardware reset occur.
3416 */
3417static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
3418{
3419        struct ena_com_dev_get_features_ctx get_feat_ctx;
3420        struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
3421        struct ena_llq_configurations llq_config;
3422        struct ena_com_dev *ena_dev = NULL;
3423        struct ena_adapter *adapter;
3424        int io_queue_num, bars, rc;
3425        struct net_device *netdev;
3426        static int adapters_found;
3427        char *queue_type_str;
3428        bool wd_state;
3429
3430        dev_dbg(&pdev->dev, "%s\n", __func__);
3431
3432        dev_info_once(&pdev->dev, "%s", version);
3433
3434        rc = pci_enable_device_mem(pdev);
3435        if (rc) {
3436                dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
3437                return rc;
3438        }
3439
3440        pci_set_master(pdev);
3441
3442        ena_dev = vzalloc(sizeof(*ena_dev));
3443        if (!ena_dev) {
3444                rc = -ENOMEM;
3445                goto err_disable_device;
3446        }
3447
3448        bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3449        rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
3450        if (rc) {
3451                dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
3452                        rc);
3453                goto err_free_ena_dev;
3454        }
3455
3456        ena_dev->reg_bar = devm_ioremap(&pdev->dev,
3457                                        pci_resource_start(pdev, ENA_REG_BAR),
3458                                        pci_resource_len(pdev, ENA_REG_BAR));
3459        if (!ena_dev->reg_bar) {
3460                dev_err(&pdev->dev, "failed to remap regs bar\n");
3461                rc = -EFAULT;
3462                goto err_free_region;
3463        }
3464
3465        ena_dev->dmadev = &pdev->dev;
3466
3467        rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
3468        if (rc) {
3469                dev_err(&pdev->dev, "ena device init failed\n");
3470                if (rc == -ETIME)
3471                        rc = -EPROBE_DEFER;
3472                goto err_free_region;
3473        }
3474
3475        set_default_llq_configurations(&llq_config);
3476
3477        rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq,
3478                                             &llq_config);
3479        if (rc) {
3480                dev_err(&pdev->dev, "ena device init failed\n");
3481                goto err_device_destroy;
3482        }
3483
3484        calc_queue_ctx.ena_dev = ena_dev;
3485        calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
3486        calc_queue_ctx.pdev = pdev;
3487
3488        /* initial Tx interrupt delay, Assumes 1 usec granularity.
3489        * Updated during device initialization with the real granularity
3490        */
3491        ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
3492        io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx);
3493        rc = ena_calc_queue_size(&calc_queue_ctx);
3494        if (rc || io_queue_num <= 0) {
3495                rc = -EFAULT;
3496                goto err_device_destroy;
3497        }
3498
3499        dev_info(&pdev->dev, "creating %d io queues. rx queue size: %d tx queue size. %d LLQ is %s\n",
3500                 io_queue_num,
3501                 calc_queue_ctx.rx_queue_size,
3502                 calc_queue_ctx.tx_queue_size,
3503                 (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ?
3504                 "ENABLED" : "DISABLED");
3505
3506        /* dev zeroed in init_etherdev */
3507        netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num);
3508        if (!netdev) {
3509                dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
3510                rc = -ENOMEM;
3511                goto err_device_destroy;
3512        }
3513
3514        SET_NETDEV_DEV(netdev, &pdev->dev);
3515
3516        adapter = netdev_priv(netdev);
3517        pci_set_drvdata(pdev, adapter);
3518
3519        adapter->ena_dev = ena_dev;
3520        adapter->netdev = netdev;
3521        adapter->pdev = pdev;
3522
3523        ena_set_conf_feat_params(adapter, &get_feat_ctx);
3524
3525        adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
3526        adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3527
3528        adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
3529        adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
3530        adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
3531        adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
3532        adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
3533        adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
3534
3535        adapter->num_queues = io_queue_num;
3536        adapter->last_monitored_tx_qid = 0;
3537
3538        adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
3539        adapter->wd_state = wd_state;
3540
3541        snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
3542
3543        rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
3544        if (rc) {
3545                dev_err(&pdev->dev,
3546                        "Failed to query interrupt moderation feature\n");
3547                goto err_netdev_destroy;
3548        }
3549        ena_init_io_rings(adapter);
3550
3551        netdev->netdev_ops = &ena_netdev_ops;
3552        netdev->watchdog_timeo = TX_TIMEOUT;
3553        ena_set_ethtool_ops(netdev);
3554
3555        netdev->priv_flags |= IFF_UNICAST_FLT;
3556
3557        u64_stats_init(&adapter->syncp);
3558
3559        rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
3560        if (rc) {
3561                dev_err(&pdev->dev,
3562                        "Failed to enable and set the admin interrupts\n");
3563                goto err_worker_destroy;
3564        }
3565        rc = ena_rss_init_default(adapter);
3566        if (rc && (rc != -EOPNOTSUPP)) {
3567                dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
3568                goto err_free_msix;
3569        }
3570
3571        ena_config_debug_area(adapter);
3572
3573        memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
3574
3575        netif_carrier_off(netdev);
3576
3577        rc = register_netdev(netdev);
3578        if (rc) {
3579                dev_err(&pdev->dev, "Cannot register net device\n");
3580                goto err_rss;
3581        }
3582
3583        INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
3584
3585        adapter->last_keep_alive_jiffies = jiffies;
3586        adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
3587        adapter->missing_tx_completion_to = TX_TIMEOUT;
3588        adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
3589
3590        ena_update_hints(adapter, &get_feat_ctx.hw_hints);
3591
3592        timer_setup(&adapter->timer_service, ena_timer_service, 0);
3593        mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3594
3595        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
3596                queue_type_str = "Regular";
3597        else
3598                queue_type_str = "Low Latency";
3599
3600        dev_info(&pdev->dev,
3601                 "%s found at mem %lx, mac addr %pM Queues %d, Placement policy: %s\n",
3602                 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
3603                 netdev->dev_addr, io_queue_num, queue_type_str);
3604
3605        set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3606
3607        adapters_found++;
3608
3609        return 0;
3610
3611err_rss:
3612        ena_com_delete_debug_area(ena_dev);
3613        ena_com_rss_destroy(ena_dev);
3614err_free_msix:
3615        ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
3616        /* stop submitting admin commands on a device that was reset */
3617        ena_com_set_admin_running_state(ena_dev, false);
3618        ena_free_mgmnt_irq(adapter);
3619        ena_disable_msix(adapter);
3620err_worker_destroy:
3621        ena_com_destroy_interrupt_moderation(ena_dev);
3622        del_timer(&adapter->timer_service);
3623err_netdev_destroy:
3624        free_netdev(netdev);
3625err_device_destroy:
3626        ena_com_delete_host_info(ena_dev);
3627        ena_com_admin_destroy(ena_dev);
3628err_free_region:
3629        ena_release_bars(ena_dev, pdev);
3630err_free_ena_dev:
3631        vfree(ena_dev);
3632err_disable_device:
3633        pci_disable_device(pdev);
3634        return rc;
3635}
3636
3637/*****************************************************************************/
3638
3639/* ena_remove - Device Removal Routine
3640 * @pdev: PCI device information struct
3641 *
3642 * ena_remove is called by the PCI subsystem to alert the driver
3643 * that it should release a PCI device.
3644 */
3645static void ena_remove(struct pci_dev *pdev)
3646{
3647        struct ena_adapter *adapter = pci_get_drvdata(pdev);
3648        struct ena_com_dev *ena_dev;
3649        struct net_device *netdev;
3650
3651        ena_dev = adapter->ena_dev;
3652        netdev = adapter->netdev;
3653
3654#ifdef CONFIG_RFS_ACCEL
3655        if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
3656                free_irq_cpu_rmap(netdev->rx_cpu_rmap);
3657                netdev->rx_cpu_rmap = NULL;
3658        }
3659#endif /* CONFIG_RFS_ACCEL */
3660        del_timer_sync(&adapter->timer_service);
3661
3662        cancel_work_sync(&adapter->reset_task);
3663
3664        rtnl_lock();
3665        ena_destroy_device(adapter, true);
3666        rtnl_unlock();
3667
3668        unregister_netdev(netdev);
3669
3670        free_netdev(netdev);
3671
3672        ena_com_rss_destroy(ena_dev);
3673
3674        ena_com_delete_debug_area(ena_dev);
3675
3676        ena_com_delete_host_info(ena_dev);
3677
3678        ena_release_bars(ena_dev, pdev);
3679
3680        pci_disable_device(pdev);
3681
3682        ena_com_destroy_interrupt_moderation(ena_dev);
3683
3684        vfree(ena_dev);
3685}
3686
3687#ifdef CONFIG_PM
3688/* ena_suspend - PM suspend callback
3689 * @pdev: PCI device information struct
3690 * @state:power state
3691 */
3692static int ena_suspend(struct pci_dev *pdev,  pm_message_t state)
3693{
3694        struct ena_adapter *adapter = pci_get_drvdata(pdev);
3695
3696        u64_stats_update_begin(&adapter->syncp);
3697        adapter->dev_stats.suspend++;
3698        u64_stats_update_end(&adapter->syncp);
3699
3700        rtnl_lock();
3701        if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3702                dev_err(&pdev->dev,
3703                        "ignoring device reset request as the device is being suspended\n");
3704                clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3705        }
3706        ena_destroy_device(adapter, true);
3707        rtnl_unlock();
3708        return 0;
3709}
3710
3711/* ena_resume - PM resume callback
3712 * @pdev: PCI device information struct
3713 *
3714 */
3715static int ena_resume(struct pci_dev *pdev)
3716{
3717        struct ena_adapter *adapter = pci_get_drvdata(pdev);
3718        int rc;
3719
3720        u64_stats_update_begin(&adapter->syncp);
3721        adapter->dev_stats.resume++;
3722        u64_stats_update_end(&adapter->syncp);
3723
3724        rtnl_lock();
3725        rc = ena_restore_device(adapter);
3726        rtnl_unlock();
3727        return rc;
3728}
3729#endif
3730
3731static struct pci_driver ena_pci_driver = {
3732        .name           = DRV_MODULE_NAME,
3733        .id_table       = ena_pci_tbl,
3734        .probe          = ena_probe,
3735        .remove         = ena_remove,
3736#ifdef CONFIG_PM
3737        .suspend    = ena_suspend,
3738        .resume     = ena_resume,
3739#endif
3740        .sriov_configure = pci_sriov_configure_simple,
3741};
3742
3743static int __init ena_init(void)
3744{
3745        pr_info("%s", version);
3746
3747        ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
3748        if (!ena_wq) {
3749                pr_err("Failed to create workqueue\n");
3750                return -ENOMEM;
3751        }
3752
3753        return pci_register_driver(&ena_pci_driver);
3754}
3755
3756static void __exit ena_cleanup(void)
3757{
3758        pci_unregister_driver(&ena_pci_driver);
3759
3760        if (ena_wq) {
3761                destroy_workqueue(ena_wq);
3762                ena_wq = NULL;
3763        }
3764}
3765
3766/******************************************************************************
3767 ******************************** AENQ Handlers *******************************
3768 *****************************************************************************/
3769/* ena_update_on_link_change:
3770 * Notify the network interface about the change in link status
3771 */
3772static void ena_update_on_link_change(void *adapter_data,
3773                                      struct ena_admin_aenq_entry *aenq_e)
3774{
3775        struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3776        struct ena_admin_aenq_link_change_desc *aenq_desc =
3777                (struct ena_admin_aenq_link_change_desc *)aenq_e;
3778        int status = aenq_desc->flags &
3779                ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3780
3781        if (status) {
3782                netdev_dbg(adapter->netdev, "%s\n", __func__);
3783                set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
3784                if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
3785                        netif_carrier_on(adapter->netdev);
3786        } else {
3787                clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
3788                netif_carrier_off(adapter->netdev);
3789        }
3790}
3791
3792static void ena_keep_alive_wd(void *adapter_data,
3793                              struct ena_admin_aenq_entry *aenq_e)
3794{
3795        struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3796        struct ena_admin_aenq_keep_alive_desc *desc;
3797        u64 rx_drops;
3798
3799        desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
3800        adapter->last_keep_alive_jiffies = jiffies;
3801
3802        rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
3803
3804        u64_stats_update_begin(&adapter->syncp);
3805        adapter->dev_stats.rx_drops = rx_drops;
3806        u64_stats_update_end(&adapter->syncp);
3807}
3808
3809static void ena_notification(void *adapter_data,
3810                             struct ena_admin_aenq_entry *aenq_e)
3811{
3812        struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3813        struct ena_admin_ena_hw_hints *hints;
3814
3815        WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
3816             "Invalid group(%x) expected %x\n",
3817             aenq_e->aenq_common_desc.group,
3818             ENA_ADMIN_NOTIFICATION);
3819
3820        switch (aenq_e->aenq_common_desc.syndrom) {
3821        case ENA_ADMIN_UPDATE_HINTS:
3822                hints = (struct ena_admin_ena_hw_hints *)
3823                        (&aenq_e->inline_data_w4);
3824                ena_update_hints(adapter, hints);
3825                break;
3826        default:
3827                netif_err(adapter, drv, adapter->netdev,
3828                          "Invalid aenq notification link state %d\n",
3829                          aenq_e->aenq_common_desc.syndrom);
3830        }
3831}
3832
3833/* This handler will called for unknown event group or unimplemented handlers*/
3834static void unimplemented_aenq_handler(void *data,
3835                                       struct ena_admin_aenq_entry *aenq_e)
3836{
3837        struct ena_adapter *adapter = (struct ena_adapter *)data;
3838
3839        netif_err(adapter, drv, adapter->netdev,
3840                  "Unknown event was received or event with unimplemented handler\n");
3841}
3842
3843static struct ena_aenq_handlers aenq_handlers = {
3844        .handlers = {
3845                [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
3846                [ENA_ADMIN_NOTIFICATION] = ena_notification,
3847                [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
3848        },
3849        .unimplemented_handler = unimplemented_aenq_handler
3850};
3851
3852module_init(ena_init);
3853module_exit(ena_cleanup);
3854