linux/drivers/net/ethernet/google/gve/gve_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
   2/* Google virtual Ethernet (gve) driver
   3 *
   4 * Copyright (C) 2015-2019 Google, Inc.
   5 */
   6
   7#include <linux/cpumask.h>
   8#include <linux/etherdevice.h>
   9#include <linux/interrupt.h>
  10#include <linux/module.h>
  11#include <linux/pci.h>
  12#include <linux/sched.h>
  13#include <linux/timer.h>
  14#include <linux/workqueue.h>
  15#include <net/sch_generic.h>
  16#include "gve.h"
  17#include "gve_adminq.h"
  18#include "gve_register.h"
  19
  20#define GVE_DEFAULT_RX_COPYBREAK        (256)
  21
  22#define DEFAULT_MSG_LEVEL       (NETIF_MSG_DRV | NETIF_MSG_LINK)
  23#define GVE_VERSION             "1.0.0"
  24#define GVE_VERSION_PREFIX      "GVE-"
  25
  26const char gve_version_str[] = GVE_VERSION;
  27static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
  28
  29static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
  30{
  31        struct gve_priv *priv = netdev_priv(dev);
  32        unsigned int start;
  33        int ring;
  34
  35        if (priv->rx) {
  36                for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
  37                        do {
  38                                start =
  39                                  u64_stats_fetch_begin(&priv->rx[ring].statss);
  40                                s->rx_packets += priv->rx[ring].rpackets;
  41                                s->rx_bytes += priv->rx[ring].rbytes;
  42                        } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
  43                                                       start));
  44                }
  45        }
  46        if (priv->tx) {
  47                for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
  48                        do {
  49                                start =
  50                                  u64_stats_fetch_begin(&priv->tx[ring].statss);
  51                                s->tx_packets += priv->tx[ring].pkt_done;
  52                                s->tx_bytes += priv->tx[ring].bytes_done;
  53                        } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
  54                                                       start));
  55                }
  56        }
  57}
  58
  59static int gve_alloc_counter_array(struct gve_priv *priv)
  60{
  61        priv->counter_array =
  62                dma_alloc_coherent(&priv->pdev->dev,
  63                                   priv->num_event_counters *
  64                                   sizeof(*priv->counter_array),
  65                                   &priv->counter_array_bus, GFP_KERNEL);
  66        if (!priv->counter_array)
  67                return -ENOMEM;
  68
  69        return 0;
  70}
  71
  72static void gve_free_counter_array(struct gve_priv *priv)
  73{
  74        dma_free_coherent(&priv->pdev->dev,
  75                          priv->num_event_counters *
  76                          sizeof(*priv->counter_array),
  77                          priv->counter_array, priv->counter_array_bus);
  78        priv->counter_array = NULL;
  79}
  80
  81static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
  82{
  83        struct gve_priv *priv = arg;
  84
  85        queue_work(priv->gve_wq, &priv->service_task);
  86        return IRQ_HANDLED;
  87}
  88
  89static irqreturn_t gve_intr(int irq, void *arg)
  90{
  91        struct gve_notify_block *block = arg;
  92        struct gve_priv *priv = block->priv;
  93
  94        iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
  95        napi_schedule_irqoff(&block->napi);
  96        return IRQ_HANDLED;
  97}
  98
  99static int gve_napi_poll(struct napi_struct *napi, int budget)
 100{
 101        struct gve_notify_block *block;
 102        __be32 __iomem *irq_doorbell;
 103        bool reschedule = false;
 104        struct gve_priv *priv;
 105
 106        block = container_of(napi, struct gve_notify_block, napi);
 107        priv = block->priv;
 108
 109        if (block->tx)
 110                reschedule |= gve_tx_poll(block, budget);
 111        if (block->rx)
 112                reschedule |= gve_rx_poll(block, budget);
 113
 114        if (reschedule)
 115                return budget;
 116
 117        napi_complete(napi);
 118        irq_doorbell = gve_irq_doorbell(priv, block);
 119        iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
 120
 121        /* Double check we have no extra work.
 122         * Ensure unmask synchronizes with checking for work.
 123         */
 124        dma_rmb();
 125        if (block->tx)
 126                reschedule |= gve_tx_poll(block, -1);
 127        if (block->rx)
 128                reschedule |= gve_rx_poll(block, -1);
 129        if (reschedule && napi_reschedule(napi))
 130                iowrite32be(GVE_IRQ_MASK, irq_doorbell);
 131
 132        return 0;
 133}
 134
 135static int gve_alloc_notify_blocks(struct gve_priv *priv)
 136{
 137        int num_vecs_requested = priv->num_ntfy_blks + 1;
 138        char *name = priv->dev->name;
 139        unsigned int active_cpus;
 140        int vecs_enabled;
 141        int i, j;
 142        int err;
 143
 144        priv->msix_vectors = kvzalloc(num_vecs_requested *
 145                                      sizeof(*priv->msix_vectors), GFP_KERNEL);
 146        if (!priv->msix_vectors)
 147                return -ENOMEM;
 148        for (i = 0; i < num_vecs_requested; i++)
 149                priv->msix_vectors[i].entry = i;
 150        vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
 151                                             GVE_MIN_MSIX, num_vecs_requested);
 152        if (vecs_enabled < 0) {
 153                dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
 154                        GVE_MIN_MSIX, vecs_enabled);
 155                err = vecs_enabled;
 156                goto abort_with_msix_vectors;
 157        }
 158        if (vecs_enabled != num_vecs_requested) {
 159                int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
 160                int vecs_per_type = new_num_ntfy_blks / 2;
 161                int vecs_left = new_num_ntfy_blks % 2;
 162
 163                priv->num_ntfy_blks = new_num_ntfy_blks;
 164                priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
 165                                                vecs_per_type);
 166                priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
 167                                                vecs_per_type + vecs_left);
 168                dev_err(&priv->pdev->dev,
 169                        "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
 170                        vecs_enabled, priv->tx_cfg.max_queues,
 171                        priv->rx_cfg.max_queues);
 172                if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
 173                        priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
 174                if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
 175                        priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
 176        }
 177        /* Half the notification blocks go to TX and half to RX */
 178        active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
 179
 180        /* Setup Management Vector  - the last vector */
 181        snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
 182                 name);
 183        err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
 184                          gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
 185        if (err) {
 186                dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
 187                goto abort_with_msix_enabled;
 188        }
 189        priv->ntfy_blocks =
 190                dma_alloc_coherent(&priv->pdev->dev,
 191                                   priv->num_ntfy_blks *
 192                                   sizeof(*priv->ntfy_blocks),
 193                                   &priv->ntfy_block_bus, GFP_KERNEL);
 194        if (!priv->ntfy_blocks) {
 195                err = -ENOMEM;
 196                goto abort_with_mgmt_vector;
 197        }
 198        /* Setup the other blocks - the first n-1 vectors */
 199        for (i = 0; i < priv->num_ntfy_blks; i++) {
 200                struct gve_notify_block *block = &priv->ntfy_blocks[i];
 201                int msix_idx = i;
 202
 203                snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
 204                         name, i);
 205                block->priv = priv;
 206                err = request_irq(priv->msix_vectors[msix_idx].vector,
 207                                  gve_intr, 0, block->name, block);
 208                if (err) {
 209                        dev_err(&priv->pdev->dev,
 210                                "Failed to receive msix vector %d\n", i);
 211                        goto abort_with_some_ntfy_blocks;
 212                }
 213                irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
 214                                      get_cpu_mask(i % active_cpus));
 215        }
 216        return 0;
 217abort_with_some_ntfy_blocks:
 218        for (j = 0; j < i; j++) {
 219                struct gve_notify_block *block = &priv->ntfy_blocks[j];
 220                int msix_idx = j;
 221
 222                irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
 223                                      NULL);
 224                free_irq(priv->msix_vectors[msix_idx].vector, block);
 225        }
 226        dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
 227                          sizeof(*priv->ntfy_blocks),
 228                          priv->ntfy_blocks, priv->ntfy_block_bus);
 229        priv->ntfy_blocks = NULL;
 230abort_with_mgmt_vector:
 231        free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 232abort_with_msix_enabled:
 233        pci_disable_msix(priv->pdev);
 234abort_with_msix_vectors:
 235        kvfree(priv->msix_vectors);
 236        priv->msix_vectors = NULL;
 237        return err;
 238}
 239
 240static void gve_free_notify_blocks(struct gve_priv *priv)
 241{
 242        int i;
 243
 244        /* Free the irqs */
 245        for (i = 0; i < priv->num_ntfy_blks; i++) {
 246                struct gve_notify_block *block = &priv->ntfy_blocks[i];
 247                int msix_idx = i;
 248
 249                irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
 250                                      NULL);
 251                free_irq(priv->msix_vectors[msix_idx].vector, block);
 252        }
 253        dma_free_coherent(&priv->pdev->dev,
 254                          priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
 255                          priv->ntfy_blocks, priv->ntfy_block_bus);
 256        priv->ntfy_blocks = NULL;
 257        free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 258        pci_disable_msix(priv->pdev);
 259        kvfree(priv->msix_vectors);
 260        priv->msix_vectors = NULL;
 261}
 262
 263static int gve_setup_device_resources(struct gve_priv *priv)
 264{
 265        int err;
 266
 267        err = gve_alloc_counter_array(priv);
 268        if (err)
 269                return err;
 270        err = gve_alloc_notify_blocks(priv);
 271        if (err)
 272                goto abort_with_counter;
 273        err = gve_adminq_configure_device_resources(priv,
 274                                                    priv->counter_array_bus,
 275                                                    priv->num_event_counters,
 276                                                    priv->ntfy_block_bus,
 277                                                    priv->num_ntfy_blks);
 278        if (unlikely(err)) {
 279                dev_err(&priv->pdev->dev,
 280                        "could not setup device_resources: err=%d\n", err);
 281                err = -ENXIO;
 282                goto abort_with_ntfy_blocks;
 283        }
 284        gve_set_device_resources_ok(priv);
 285        return 0;
 286abort_with_ntfy_blocks:
 287        gve_free_notify_blocks(priv);
 288abort_with_counter:
 289        gve_free_counter_array(priv);
 290        return err;
 291}
 292
 293static void gve_trigger_reset(struct gve_priv *priv);
 294
 295static void gve_teardown_device_resources(struct gve_priv *priv)
 296{
 297        int err;
 298
 299        /* Tell device its resources are being freed */
 300        if (gve_get_device_resources_ok(priv)) {
 301                err = gve_adminq_deconfigure_device_resources(priv);
 302                if (err) {
 303                        dev_err(&priv->pdev->dev,
 304                                "Could not deconfigure device resources: err=%d\n",
 305                                err);
 306                        gve_trigger_reset(priv);
 307                }
 308        }
 309        gve_free_counter_array(priv);
 310        gve_free_notify_blocks(priv);
 311        gve_clear_device_resources_ok(priv);
 312}
 313
 314static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
 315{
 316        struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 317
 318        netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
 319                       NAPI_POLL_WEIGHT);
 320}
 321
 322static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
 323{
 324        struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 325
 326        netif_napi_del(&block->napi);
 327}
 328
 329static int gve_register_qpls(struct gve_priv *priv)
 330{
 331        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 332        int err;
 333        int i;
 334
 335        for (i = 0; i < num_qpls; i++) {
 336                err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
 337                if (err) {
 338                        netif_err(priv, drv, priv->dev,
 339                                  "failed to register queue page list %d\n",
 340                                  priv->qpls[i].id);
 341                        /* This failure will trigger a reset - no need to clean
 342                         * up
 343                         */
 344                        return err;
 345                }
 346        }
 347        return 0;
 348}
 349
 350static int gve_unregister_qpls(struct gve_priv *priv)
 351{
 352        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 353        int err;
 354        int i;
 355
 356        for (i = 0; i < num_qpls; i++) {
 357                err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
 358                /* This failure will trigger a reset - no need to clean up */
 359                if (err) {
 360                        netif_err(priv, drv, priv->dev,
 361                                  "Failed to unregister queue page list %d\n",
 362                                  priv->qpls[i].id);
 363                        return err;
 364                }
 365        }
 366        return 0;
 367}
 368
 369static int gve_create_rings(struct gve_priv *priv)
 370{
 371        int err;
 372        int i;
 373
 374        for (i = 0; i < priv->tx_cfg.num_queues; i++) {
 375                err = gve_adminq_create_tx_queue(priv, i);
 376                if (err) {
 377                        netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n",
 378                                  i);
 379                        /* This failure will trigger a reset - no need to clean
 380                         * up
 381                         */
 382                        return err;
 383                }
 384                netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i);
 385        }
 386        for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 387                err = gve_adminq_create_rx_queue(priv, i);
 388                if (err) {
 389                        netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n",
 390                                  i);
 391                        /* This failure will trigger a reset - no need to clean
 392                         * up
 393                         */
 394                        return err;
 395                }
 396                /* Rx data ring has been prefilled with packet buffers at
 397                 * queue allocation time.
 398                 * Write the doorbell to provide descriptor slots and packet
 399                 * buffers to the NIC.
 400                 */
 401                gve_rx_write_doorbell(priv, &priv->rx[i]);
 402                netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i);
 403        }
 404
 405        return 0;
 406}
 407
 408static int gve_alloc_rings(struct gve_priv *priv)
 409{
 410        int ntfy_idx;
 411        int err;
 412        int i;
 413
 414        /* Setup tx rings */
 415        priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
 416                            GFP_KERNEL);
 417        if (!priv->tx)
 418                return -ENOMEM;
 419        err = gve_tx_alloc_rings(priv);
 420        if (err)
 421                goto free_tx;
 422        /* Setup rx rings */
 423        priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
 424                            GFP_KERNEL);
 425        if (!priv->rx) {
 426                err = -ENOMEM;
 427                goto free_tx_queue;
 428        }
 429        err = gve_rx_alloc_rings(priv);
 430        if (err)
 431                goto free_rx;
 432        /* Add tx napi & init sync stats*/
 433        for (i = 0; i < priv->tx_cfg.num_queues; i++) {
 434                u64_stats_init(&priv->tx[i].statss);
 435                ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
 436                gve_add_napi(priv, ntfy_idx);
 437        }
 438        /* Add rx napi  & init sync stats*/
 439        for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 440                u64_stats_init(&priv->rx[i].statss);
 441                ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
 442                gve_add_napi(priv, ntfy_idx);
 443        }
 444
 445        return 0;
 446
 447free_rx:
 448        kvfree(priv->rx);
 449        priv->rx = NULL;
 450free_tx_queue:
 451        gve_tx_free_rings(priv);
 452free_tx:
 453        kvfree(priv->tx);
 454        priv->tx = NULL;
 455        return err;
 456}
 457
 458static int gve_destroy_rings(struct gve_priv *priv)
 459{
 460        int err;
 461        int i;
 462
 463        for (i = 0; i < priv->tx_cfg.num_queues; i++) {
 464                err = gve_adminq_destroy_tx_queue(priv, i);
 465                if (err) {
 466                        netif_err(priv, drv, priv->dev,
 467                                  "failed to destroy tx queue %d\n",
 468                                  i);
 469                        /* This failure will trigger a reset - no need to clean
 470                         * up
 471                         */
 472                        return err;
 473                }
 474                netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i);
 475        }
 476        for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 477                err = gve_adminq_destroy_rx_queue(priv, i);
 478                if (err) {
 479                        netif_err(priv, drv, priv->dev,
 480                                  "failed to destroy rx queue %d\n",
 481                                  i);
 482                        /* This failure will trigger a reset - no need to clean
 483                         * up
 484                         */
 485                        return err;
 486                }
 487                netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i);
 488        }
 489        return 0;
 490}
 491
 492static void gve_free_rings(struct gve_priv *priv)
 493{
 494        int ntfy_idx;
 495        int i;
 496
 497        if (priv->tx) {
 498                for (i = 0; i < priv->tx_cfg.num_queues; i++) {
 499                        ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
 500                        gve_remove_napi(priv, ntfy_idx);
 501                }
 502                gve_tx_free_rings(priv);
 503                kvfree(priv->tx);
 504                priv->tx = NULL;
 505        }
 506        if (priv->rx) {
 507                for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 508                        ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
 509                        gve_remove_napi(priv, ntfy_idx);
 510                }
 511                gve_rx_free_rings(priv);
 512                kvfree(priv->rx);
 513                priv->rx = NULL;
 514        }
 515}
 516
 517int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
 518                   enum dma_data_direction dir)
 519{
 520        *page = alloc_page(GFP_KERNEL);
 521        if (!*page)
 522                return -ENOMEM;
 523        *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
 524        if (dma_mapping_error(dev, *dma)) {
 525                put_page(*page);
 526                return -ENOMEM;
 527        }
 528        return 0;
 529}
 530
 531static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
 532                                     int pages)
 533{
 534        struct gve_queue_page_list *qpl = &priv->qpls[id];
 535        int err;
 536        int i;
 537
 538        if (pages + priv->num_registered_pages > priv->max_registered_pages) {
 539                netif_err(priv, drv, priv->dev,
 540                          "Reached max number of registered pages %llu > %llu\n",
 541                          pages + priv->num_registered_pages,
 542                          priv->max_registered_pages);
 543                return -EINVAL;
 544        }
 545
 546        qpl->id = id;
 547        qpl->num_entries = pages;
 548        qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
 549        /* caller handles clean up */
 550        if (!qpl->pages)
 551                return -ENOMEM;
 552        qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
 553                                   GFP_KERNEL);
 554        /* caller handles clean up */
 555        if (!qpl->page_buses)
 556                return -ENOMEM;
 557
 558        for (i = 0; i < pages; i++) {
 559                err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i],
 560                                     &qpl->page_buses[i],
 561                                     gve_qpl_dma_dir(priv, id));
 562                /* caller handles clean up */
 563                if (err)
 564                        return -ENOMEM;
 565        }
 566        priv->num_registered_pages += pages;
 567
 568        return 0;
 569}
 570
 571void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
 572                   enum dma_data_direction dir)
 573{
 574        if (!dma_mapping_error(dev, dma))
 575                dma_unmap_page(dev, dma, PAGE_SIZE, dir);
 576        if (page)
 577                put_page(page);
 578}
 579
 580static void gve_free_queue_page_list(struct gve_priv *priv,
 581                                     int id)
 582{
 583        struct gve_queue_page_list *qpl = &priv->qpls[id];
 584        int i;
 585
 586        if (!qpl->pages)
 587                return;
 588        if (!qpl->page_buses)
 589                goto free_pages;
 590
 591        for (i = 0; i < qpl->num_entries; i++)
 592                gve_free_page(&priv->pdev->dev, qpl->pages[i],
 593                              qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
 594
 595        kvfree(qpl->page_buses);
 596free_pages:
 597        kvfree(qpl->pages);
 598        priv->num_registered_pages -= qpl->num_entries;
 599}
 600
 601static int gve_alloc_qpls(struct gve_priv *priv)
 602{
 603        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 604        int i, j;
 605        int err;
 606
 607        priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
 608        if (!priv->qpls)
 609                return -ENOMEM;
 610
 611        for (i = 0; i < gve_num_tx_qpls(priv); i++) {
 612                err = gve_alloc_queue_page_list(priv, i,
 613                                                priv->tx_pages_per_qpl);
 614                if (err)
 615                        goto free_qpls;
 616        }
 617        for (; i < num_qpls; i++) {
 618                err = gve_alloc_queue_page_list(priv, i,
 619                                                priv->rx_pages_per_qpl);
 620                if (err)
 621                        goto free_qpls;
 622        }
 623
 624        priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
 625                                     sizeof(unsigned long) * BITS_PER_BYTE;
 626        priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
 627                                            sizeof(unsigned long), GFP_KERNEL);
 628        if (!priv->qpl_cfg.qpl_id_map) {
 629                err = -ENOMEM;
 630                goto free_qpls;
 631        }
 632
 633        return 0;
 634
 635free_qpls:
 636        for (j = 0; j <= i; j++)
 637                gve_free_queue_page_list(priv, j);
 638        kvfree(priv->qpls);
 639        return err;
 640}
 641
 642static void gve_free_qpls(struct gve_priv *priv)
 643{
 644        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 645        int i;
 646
 647        kvfree(priv->qpl_cfg.qpl_id_map);
 648
 649        for (i = 0; i < num_qpls; i++)
 650                gve_free_queue_page_list(priv, i);
 651
 652        kvfree(priv->qpls);
 653}
 654
 655/* Use this to schedule a reset when the device is capable of continuing
 656 * to handle other requests in its current state. If it is not, do a reset
 657 * in thread instead.
 658 */
 659void gve_schedule_reset(struct gve_priv *priv)
 660{
 661        gve_set_do_reset(priv);
 662        queue_work(priv->gve_wq, &priv->service_task);
 663}
 664
 665static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
 666static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
 667static void gve_turndown(struct gve_priv *priv);
 668static void gve_turnup(struct gve_priv *priv);
 669
 670static int gve_open(struct net_device *dev)
 671{
 672        struct gve_priv *priv = netdev_priv(dev);
 673        int err;
 674
 675        err = gve_alloc_qpls(priv);
 676        if (err)
 677                return err;
 678        err = gve_alloc_rings(priv);
 679        if (err)
 680                goto free_qpls;
 681
 682        err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
 683        if (err)
 684                goto free_rings;
 685        err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
 686        if (err)
 687                goto free_rings;
 688
 689        err = gve_register_qpls(priv);
 690        if (err)
 691                goto reset;
 692        err = gve_create_rings(priv);
 693        if (err)
 694                goto reset;
 695        gve_set_device_rings_ok(priv);
 696
 697        gve_turnup(priv);
 698        netif_carrier_on(dev);
 699        return 0;
 700
 701free_rings:
 702        gve_free_rings(priv);
 703free_qpls:
 704        gve_free_qpls(priv);
 705        return err;
 706
 707reset:
 708        /* This must have been called from a reset due to the rtnl lock
 709         * so just return at this point.
 710         */
 711        if (gve_get_reset_in_progress(priv))
 712                return err;
 713        /* Otherwise reset before returning */
 714        gve_reset_and_teardown(priv, true);
 715        /* if this fails there is nothing we can do so just ignore the return */
 716        gve_reset_recovery(priv, false);
 717        /* return the original error */
 718        return err;
 719}
 720
 721static int gve_close(struct net_device *dev)
 722{
 723        struct gve_priv *priv = netdev_priv(dev);
 724        int err;
 725
 726        netif_carrier_off(dev);
 727        if (gve_get_device_rings_ok(priv)) {
 728                gve_turndown(priv);
 729                err = gve_destroy_rings(priv);
 730                if (err)
 731                        goto err;
 732                err = gve_unregister_qpls(priv);
 733                if (err)
 734                        goto err;
 735                gve_clear_device_rings_ok(priv);
 736        }
 737
 738        gve_free_rings(priv);
 739        gve_free_qpls(priv);
 740        return 0;
 741
 742err:
 743        /* This must have been called from a reset due to the rtnl lock
 744         * so just return at this point.
 745         */
 746        if (gve_get_reset_in_progress(priv))
 747                return err;
 748        /* Otherwise reset before returning */
 749        gve_reset_and_teardown(priv, true);
 750        return gve_reset_recovery(priv, false);
 751}
 752
 753int gve_adjust_queues(struct gve_priv *priv,
 754                      struct gve_queue_config new_rx_config,
 755                      struct gve_queue_config new_tx_config)
 756{
 757        int err;
 758
 759        if (netif_carrier_ok(priv->dev)) {
 760                /* To make this process as simple as possible we teardown the
 761                 * device, set the new configuration, and then bring the device
 762                 * up again.
 763                 */
 764                err = gve_close(priv->dev);
 765                /* we have already tried to reset in close,
 766                 * just fail at this point
 767                 */
 768                if (err)
 769                        return err;
 770                priv->tx_cfg = new_tx_config;
 771                priv->rx_cfg = new_rx_config;
 772
 773                err = gve_open(priv->dev);
 774                if (err)
 775                        goto err;
 776
 777                return 0;
 778        }
 779        /* Set the config for the next up. */
 780        priv->tx_cfg = new_tx_config;
 781        priv->rx_cfg = new_rx_config;
 782
 783        return 0;
 784err:
 785        netif_err(priv, drv, priv->dev,
 786                  "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
 787        gve_turndown(priv);
 788        return err;
 789}
 790
 791static void gve_turndown(struct gve_priv *priv)
 792{
 793        int idx;
 794
 795        if (netif_carrier_ok(priv->dev))
 796                netif_carrier_off(priv->dev);
 797
 798        if (!gve_get_napi_enabled(priv))
 799                return;
 800
 801        /* Disable napi to prevent more work from coming in */
 802        for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
 803                int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
 804                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 805
 806                napi_disable(&block->napi);
 807        }
 808        for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
 809                int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
 810                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 811
 812                napi_disable(&block->napi);
 813        }
 814
 815        /* Stop tx queues */
 816        netif_tx_disable(priv->dev);
 817
 818        gve_clear_napi_enabled(priv);
 819}
 820
 821static void gve_turnup(struct gve_priv *priv)
 822{
 823        int idx;
 824
 825        /* Start the tx queues */
 826        netif_tx_start_all_queues(priv->dev);
 827
 828        /* Enable napi and unmask interrupts for all queues */
 829        for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
 830                int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
 831                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 832
 833                napi_enable(&block->napi);
 834                iowrite32be(0, gve_irq_doorbell(priv, block));
 835        }
 836        for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
 837                int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
 838                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 839
 840                napi_enable(&block->napi);
 841                iowrite32be(0, gve_irq_doorbell(priv, block));
 842        }
 843
 844        gve_set_napi_enabled(priv);
 845}
 846
 847static void gve_tx_timeout(struct net_device *dev)
 848{
 849        struct gve_priv *priv = netdev_priv(dev);
 850
 851        gve_schedule_reset(priv);
 852        priv->tx_timeo_cnt++;
 853}
 854
 855static const struct net_device_ops gve_netdev_ops = {
 856        .ndo_start_xmit         =       gve_tx,
 857        .ndo_open               =       gve_open,
 858        .ndo_stop               =       gve_close,
 859        .ndo_get_stats64        =       gve_get_stats,
 860        .ndo_tx_timeout         =       gve_tx_timeout,
 861};
 862
 863static void gve_handle_status(struct gve_priv *priv, u32 status)
 864{
 865        if (GVE_DEVICE_STATUS_RESET_MASK & status) {
 866                dev_info(&priv->pdev->dev, "Device requested reset.\n");
 867                gve_set_do_reset(priv);
 868        }
 869}
 870
 871static void gve_handle_reset(struct gve_priv *priv)
 872{
 873        /* A service task will be scheduled at the end of probe to catch any
 874         * resets that need to happen, and we don't want to reset until
 875         * probe is done.
 876         */
 877        if (gve_get_probe_in_progress(priv))
 878                return;
 879
 880        if (gve_get_do_reset(priv)) {
 881                rtnl_lock();
 882                gve_reset(priv, false);
 883                rtnl_unlock();
 884        }
 885}
 886
 887/* Handle NIC status register changes and reset requests */
 888static void gve_service_task(struct work_struct *work)
 889{
 890        struct gve_priv *priv = container_of(work, struct gve_priv,
 891                                             service_task);
 892
 893        gve_handle_status(priv,
 894                          ioread32be(&priv->reg_bar0->device_status));
 895
 896        gve_handle_reset(priv);
 897}
 898
 899static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
 900{
 901        int num_ntfy;
 902        int err;
 903
 904        /* Set up the adminq */
 905        err = gve_adminq_alloc(&priv->pdev->dev, priv);
 906        if (err) {
 907                dev_err(&priv->pdev->dev,
 908                        "Failed to alloc admin queue: err=%d\n", err);
 909                return err;
 910        }
 911
 912        if (skip_describe_device)
 913                goto setup_device;
 914
 915        /* Get the initial information we need from the device */
 916        err = gve_adminq_describe_device(priv);
 917        if (err) {
 918                dev_err(&priv->pdev->dev,
 919                        "Could not get device information: err=%d\n", err);
 920                goto err;
 921        }
 922        if (priv->dev->extended->max_mtu > PAGE_SIZE) {
 923                priv->dev->extended->max_mtu = PAGE_SIZE;
 924                err = gve_adminq_set_mtu(priv, priv->dev->mtu);
 925                if (err) {
 926                        netif_err(priv, drv, priv->dev, "Could not set mtu");
 927                        goto err;
 928                }
 929        }
 930        priv->dev->mtu = priv->dev->extended->max_mtu;
 931        num_ntfy = pci_msix_vec_count(priv->pdev);
 932        if (num_ntfy <= 0) {
 933                dev_err(&priv->pdev->dev,
 934                        "could not count MSI-x vectors: err=%d\n", num_ntfy);
 935                err = num_ntfy;
 936                goto err;
 937        } else if (num_ntfy < GVE_MIN_MSIX) {
 938                dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
 939                        GVE_MIN_MSIX, num_ntfy);
 940                err = -EINVAL;
 941                goto err;
 942        }
 943
 944        priv->num_registered_pages = 0;
 945        priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
 946        /* gvnic has one Notification Block per MSI-x vector, except for the
 947         * management vector
 948         */
 949        priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
 950        priv->mgmt_msix_idx = priv->num_ntfy_blks;
 951
 952        priv->tx_cfg.max_queues =
 953                min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
 954        priv->rx_cfg.max_queues =
 955                min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
 956
 957        priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
 958        priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
 959        if (priv->default_num_queues > 0) {
 960                priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
 961                                                priv->tx_cfg.num_queues);
 962                priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
 963                                                priv->rx_cfg.num_queues);
 964        }
 965
 966        netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n",
 967                   priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
 968        netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n",
 969                   priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
 970
 971setup_device:
 972        err = gve_setup_device_resources(priv);
 973        if (!err)
 974                return 0;
 975err:
 976        gve_adminq_free(&priv->pdev->dev, priv);
 977        return err;
 978}
 979
 980static void gve_teardown_priv_resources(struct gve_priv *priv)
 981{
 982        gve_teardown_device_resources(priv);
 983        gve_adminq_free(&priv->pdev->dev, priv);
 984}
 985
 986static void gve_trigger_reset(struct gve_priv *priv)
 987{
 988        /* Reset the device by releasing the AQ */
 989        gve_adminq_release(priv);
 990}
 991
 992static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
 993{
 994        gve_trigger_reset(priv);
 995        /* With the reset having already happened, close cannot fail */
 996        if (was_up)
 997                gve_close(priv->dev);
 998        gve_teardown_priv_resources(priv);
 999}
1000
1001static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
1002{
1003        int err;
1004
1005        err = gve_init_priv(priv, true);
1006        if (err)
1007                goto err;
1008        if (was_up) {
1009                err = gve_open(priv->dev);
1010                if (err)
1011                        goto err;
1012        }
1013        return 0;
1014err:
1015        dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1016        gve_turndown(priv);
1017        return err;
1018}
1019
1020int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1021{
1022        bool was_up = netif_carrier_ok(priv->dev);
1023        int err;
1024
1025        dev_info(&priv->pdev->dev, "Performing reset\n");
1026        gve_clear_do_reset(priv);
1027        gve_set_reset_in_progress(priv);
1028        /* If we aren't attempting to teardown normally, just go turndown and
1029         * reset right away.
1030         */
1031        if (!attempt_teardown) {
1032                gve_turndown(priv);
1033                gve_reset_and_teardown(priv, was_up);
1034        } else {
1035                /* Otherwise attempt to close normally */
1036                if (was_up) {
1037                        err = gve_close(priv->dev);
1038                        /* If that fails reset as we did above */
1039                        if (err)
1040                                gve_reset_and_teardown(priv, was_up);
1041                }
1042                /* Clean up any remaining resources */
1043                gve_teardown_priv_resources(priv);
1044        }
1045
1046        /* Set it all back up */
1047        err = gve_reset_recovery(priv, was_up);
1048        gve_clear_reset_in_progress(priv);
1049        return err;
1050}
1051
1052static void gve_write_version(u8 __iomem *driver_version_register)
1053{
1054        const char *c = gve_version_prefix;
1055
1056        while (*c) {
1057                writeb(*c, driver_version_register);
1058                c++;
1059        }
1060
1061        c = gve_version_str;
1062        while (*c) {
1063                writeb(*c, driver_version_register);
1064                c++;
1065        }
1066        writeb('\n', driver_version_register);
1067}
1068
1069static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1070{
1071        int max_tx_queues, max_rx_queues;
1072        struct net_device *dev;
1073        __be32 __iomem *db_bar;
1074        struct gve_registers __iomem *reg_bar;
1075        struct gve_priv *priv;
1076        int err;
1077
1078        err = pci_enable_device(pdev);
1079        if (err)
1080                return -ENXIO;
1081
1082        err = pci_request_regions(pdev, "gvnic-cfg");
1083        if (err)
1084                goto abort_with_enabled;
1085
1086        pci_set_master(pdev);
1087
1088        err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1089        if (err) {
1090                dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1091                goto abort_with_pci_region;
1092        }
1093
1094        err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1095        if (err) {
1096                dev_err(&pdev->dev,
1097                        "Failed to set consistent dma mask: err=%d\n", err);
1098                goto abort_with_pci_region;
1099        }
1100
1101        reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1102        if (!reg_bar) {
1103                dev_err(&pdev->dev, "Failed to map pci bar!\n");
1104                err = -ENOMEM;
1105                goto abort_with_pci_region;
1106        }
1107
1108        db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1109        if (!db_bar) {
1110                dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1111                err = -ENOMEM;
1112                goto abort_with_reg_bar;
1113        }
1114
1115        gve_write_version(&reg_bar->driver_version);
1116        /* Get max queues to alloc etherdev */
1117        max_rx_queues = ioread32be(&reg_bar->max_tx_queues);
1118        max_tx_queues = ioread32be(&reg_bar->max_rx_queues);
1119        /* Alloc and setup the netdev and priv */
1120        dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1121        if (!dev) {
1122                dev_err(&pdev->dev, "could not allocate netdev\n");
1123                goto abort_with_db_bar;
1124        }
1125        SET_NETDEV_DEV(dev, &pdev->dev);
1126        pci_set_drvdata(pdev, dev);
1127        dev->ethtool_ops = &gve_ethtool_ops;
1128        dev->netdev_ops = &gve_netdev_ops;
1129        /* advertise features */
1130        dev->hw_features = NETIF_F_HIGHDMA;
1131        dev->hw_features |= NETIF_F_SG;
1132        dev->hw_features |= NETIF_F_HW_CSUM;
1133        dev->hw_features |= NETIF_F_TSO;
1134        dev->hw_features |= NETIF_F_TSO6;
1135        dev->hw_features |= NETIF_F_TSO_ECN;
1136        dev->hw_features |= NETIF_F_RXCSUM;
1137        dev->hw_features |= NETIF_F_RXHASH;
1138        dev->features = dev->hw_features;
1139        dev->watchdog_timeo = 5 * HZ;
1140        dev->extended->min_mtu = ETH_MIN_MTU;
1141        netif_carrier_off(dev);
1142
1143        priv = netdev_priv(dev);
1144        priv->dev = dev;
1145        priv->pdev = pdev;
1146        priv->msg_enable = DEFAULT_MSG_LEVEL;
1147        priv->reg_bar0 = reg_bar;
1148        priv->db_bar2 = db_bar;
1149        priv->service_task_flags = 0x0;
1150        priv->state_flags = 0x0;
1151
1152        gve_set_probe_in_progress(priv);
1153        priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1154        if (!priv->gve_wq) {
1155                dev_err(&pdev->dev, "Could not allocate workqueue");
1156                err = -ENOMEM;
1157                goto abort_with_netdev;
1158        }
1159        INIT_WORK(&priv->service_task, gve_service_task);
1160        priv->tx_cfg.max_queues = max_tx_queues;
1161        priv->rx_cfg.max_queues = max_rx_queues;
1162
1163        err = gve_init_priv(priv, false);
1164        if (err)
1165                goto abort_with_wq;
1166
1167        err = register_netdev(dev);
1168        if (err)
1169                goto abort_with_wq;
1170
1171        dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1172        gve_clear_probe_in_progress(priv);
1173        queue_work(priv->gve_wq, &priv->service_task);
1174        return 0;
1175
1176abort_with_wq:
1177        destroy_workqueue(priv->gve_wq);
1178
1179abort_with_netdev:
1180        free_netdev(dev);
1181
1182abort_with_db_bar:
1183        pci_iounmap(pdev, db_bar);
1184
1185abort_with_reg_bar:
1186        pci_iounmap(pdev, reg_bar);
1187
1188abort_with_pci_region:
1189        pci_release_regions(pdev);
1190
1191abort_with_enabled:
1192        pci_disable_device(pdev);
1193        return -ENXIO;
1194}
1195
1196static void gve_remove(struct pci_dev *pdev)
1197{
1198        struct net_device *netdev = pci_get_drvdata(pdev);
1199        struct gve_priv *priv = netdev_priv(netdev);
1200        __be32 __iomem *db_bar = priv->db_bar2;
1201        void __iomem *reg_bar = priv->reg_bar0;
1202
1203        unregister_netdev(netdev);
1204        gve_teardown_priv_resources(priv);
1205        destroy_workqueue(priv->gve_wq);
1206        free_netdev(netdev);
1207        pci_iounmap(pdev, db_bar);
1208        pci_iounmap(pdev, reg_bar);
1209        pci_release_regions(pdev);
1210        pci_disable_device(pdev);
1211}
1212
1213static const struct pci_device_id gve_id_table[] = {
1214        { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1215        { }
1216};
1217
1218static struct pci_driver gvnic_driver = {
1219        .name           = "gvnic",
1220        .id_table       = gve_id_table,
1221        .probe          = gve_probe,
1222        .remove         = gve_remove,
1223};
1224
1225module_pci_driver(gvnic_driver);
1226
1227MODULE_DEVICE_TABLE(pci, gve_id_table);
1228MODULE_AUTHOR("Google, Inc.");
1229MODULE_DESCRIPTION("gVNIC Driver");
1230MODULE_LICENSE("Dual MIT/GPL");
1231MODULE_VERSION(GVE_VERSION);
1232