linux/drivers/net/ethernet/google/gve/gve_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
   2/* Google virtual Ethernet (gve) driver
   3 *
   4 * Copyright (C) 2015-2019 Google, Inc.
   5 */
   6
   7#include <linux/cpumask.h>
   8#include <linux/etherdevice.h>
   9#include <linux/interrupt.h>
  10#include <linux/module.h>
  11#include <linux/pci.h>
  12#include <linux/sched.h>
  13#include <linux/timer.h>
  14#include <linux/workqueue.h>
  15#include <net/sch_generic.h>
  16#include "gve.h"
  17#include "gve_adminq.h"
  18#include "gve_register.h"
  19
  20#define GVE_DEFAULT_RX_COPYBREAK        (256)
  21
  22#define DEFAULT_MSG_LEVEL       (NETIF_MSG_DRV | NETIF_MSG_LINK)
  23#define GVE_VERSION             "1.0.0"
  24#define GVE_VERSION_PREFIX      "GVE-"
  25
  26const char gve_version_str[] = GVE_VERSION;
  27static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
  28
  29static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
  30{
  31        struct gve_priv *priv = netdev_priv(dev);
  32        unsigned int start;
  33        int ring;
  34
  35        if (priv->rx) {
  36                for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
  37                        do {
  38                                start =
  39                                  u64_stats_fetch_begin(&priv->rx[ring].statss);
  40                                s->rx_packets += priv->rx[ring].rpackets;
  41                                s->rx_bytes += priv->rx[ring].rbytes;
  42                        } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
  43                                                       start));
  44                }
  45        }
  46        if (priv->tx) {
  47                for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
  48                        do {
  49                                start =
  50                                  u64_stats_fetch_begin(&priv->tx[ring].statss);
  51                                s->tx_packets += priv->tx[ring].pkt_done;
  52                                s->tx_bytes += priv->tx[ring].bytes_done;
  53                        } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
  54                                                       start));
  55                }
  56        }
  57}
  58
  59static int gve_alloc_counter_array(struct gve_priv *priv)
  60{
  61        priv->counter_array =
  62                dma_alloc_coherent(&priv->pdev->dev,
  63                                   priv->num_event_counters *
  64                                   sizeof(*priv->counter_array),
  65                                   &priv->counter_array_bus, GFP_KERNEL);
  66        if (!priv->counter_array)
  67                return -ENOMEM;
  68
  69        return 0;
  70}
  71
  72static void gve_free_counter_array(struct gve_priv *priv)
  73{
  74        dma_free_coherent(&priv->pdev->dev,
  75                          priv->num_event_counters *
  76                          sizeof(*priv->counter_array),
  77                          priv->counter_array, priv->counter_array_bus);
  78        priv->counter_array = NULL;
  79}
  80
  81/* NIC requests to report stats */
  82static void gve_stats_report_task(struct work_struct *work)
  83{
  84        struct gve_priv *priv = container_of(work, struct gve_priv,
  85                                             stats_report_task);
  86        if (gve_get_do_report_stats(priv)) {
  87                gve_handle_report_stats(priv);
  88                gve_clear_do_report_stats(priv);
  89        }
  90}
  91
  92static void gve_stats_report_schedule(struct gve_priv *priv)
  93{
  94        if (!gve_get_probe_in_progress(priv) &&
  95            !gve_get_reset_in_progress(priv)) {
  96                gve_set_do_report_stats(priv);
  97                queue_work(priv->gve_wq, &priv->stats_report_task);
  98        }
  99}
 100
 101static void gve_stats_report_timer(struct timer_list *t)
 102{
 103        struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
 104
 105        mod_timer(&priv->stats_report_timer,
 106                  round_jiffies(jiffies +
 107                  msecs_to_jiffies(priv->stats_report_timer_period)));
 108        gve_stats_report_schedule(priv);
 109}
 110
 111static int gve_alloc_stats_report(struct gve_priv *priv)
 112{
 113        int tx_stats_num, rx_stats_num;
 114
 115        tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
 116                       priv->tx_cfg.num_queues;
 117        rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
 118                       priv->rx_cfg.num_queues;
 119        priv->stats_report_len = struct_size(priv->stats_report, stats,
 120                                             tx_stats_num + rx_stats_num);
 121        priv->stats_report =
 122                dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
 123                                   &priv->stats_report_bus, GFP_KERNEL);
 124        if (!priv->stats_report)
 125                return -ENOMEM;
 126        /* Set up timer for the report-stats task */
 127        timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
 128        priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
 129        return 0;
 130}
 131
 132static void gve_free_stats_report(struct gve_priv *priv)
 133{
 134        del_timer_sync(&priv->stats_report_timer);
 135        dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
 136                          priv->stats_report, priv->stats_report_bus);
 137        priv->stats_report = NULL;
 138}
 139
 140static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
 141{
 142        struct gve_priv *priv = arg;
 143
 144        queue_work(priv->gve_wq, &priv->service_task);
 145        return IRQ_HANDLED;
 146}
 147
 148static irqreturn_t gve_intr(int irq, void *arg)
 149{
 150        struct gve_notify_block *block = arg;
 151        struct gve_priv *priv = block->priv;
 152
 153        iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
 154        napi_schedule_irqoff(&block->napi);
 155        return IRQ_HANDLED;
 156}
 157
 158static int gve_napi_poll(struct napi_struct *napi, int budget)
 159{
 160        struct gve_notify_block *block;
 161        __be32 __iomem *irq_doorbell;
 162        bool reschedule = false;
 163        struct gve_priv *priv;
 164
 165        block = container_of(napi, struct gve_notify_block, napi);
 166        priv = block->priv;
 167
 168        if (block->tx)
 169                reschedule |= gve_tx_poll(block, budget);
 170        if (block->rx)
 171                reschedule |= gve_rx_poll(block, budget);
 172
 173        if (reschedule)
 174                return budget;
 175
 176        napi_complete(napi);
 177        irq_doorbell = gve_irq_doorbell(priv, block);
 178        iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
 179
 180        /* Double check we have no extra work.
 181         * Ensure unmask synchronizes with checking for work.
 182         */
 183        dma_rmb();
 184        if (block->tx)
 185                reschedule |= gve_tx_poll(block, -1);
 186        if (block->rx)
 187                reschedule |= gve_rx_poll(block, -1);
 188        if (reschedule && napi_reschedule(napi))
 189                iowrite32be(GVE_IRQ_MASK, irq_doorbell);
 190
 191        return 0;
 192}
 193
 194static int gve_alloc_notify_blocks(struct gve_priv *priv)
 195{
 196        int num_vecs_requested = priv->num_ntfy_blks + 1;
 197        char *name = priv->dev->name;
 198        unsigned int active_cpus;
 199        int vecs_enabled;
 200        int i, j;
 201        int err;
 202
 203        priv->msix_vectors = kvzalloc(num_vecs_requested *
 204                                      sizeof(*priv->msix_vectors), GFP_KERNEL);
 205        if (!priv->msix_vectors)
 206                return -ENOMEM;
 207        for (i = 0; i < num_vecs_requested; i++)
 208                priv->msix_vectors[i].entry = i;
 209        vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
 210                                             GVE_MIN_MSIX, num_vecs_requested);
 211        if (vecs_enabled < 0) {
 212                dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
 213                        GVE_MIN_MSIX, vecs_enabled);
 214                err = vecs_enabled;
 215                goto abort_with_msix_vectors;
 216        }
 217        if (vecs_enabled != num_vecs_requested) {
 218                int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
 219                int vecs_per_type = new_num_ntfy_blks / 2;
 220                int vecs_left = new_num_ntfy_blks % 2;
 221
 222                priv->num_ntfy_blks = new_num_ntfy_blks;
 223                priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
 224                                                vecs_per_type);
 225                priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
 226                                                vecs_per_type + vecs_left);
 227                dev_err(&priv->pdev->dev,
 228                        "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
 229                        vecs_enabled, priv->tx_cfg.max_queues,
 230                        priv->rx_cfg.max_queues);
 231                if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
 232                        priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
 233                if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
 234                        priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
 235        }
 236        /* Half the notification blocks go to TX and half to RX */
 237        active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
 238
 239        /* Setup Management Vector  - the last vector */
 240        snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
 241                 name);
 242        err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
 243                          gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
 244        if (err) {
 245                dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
 246                goto abort_with_msix_enabled;
 247        }
 248        priv->ntfy_blocks =
 249                dma_alloc_coherent(&priv->pdev->dev,
 250                                   priv->num_ntfy_blks *
 251                                   sizeof(*priv->ntfy_blocks),
 252                                   &priv->ntfy_block_bus, GFP_KERNEL);
 253        if (!priv->ntfy_blocks) {
 254                err = -ENOMEM;
 255                goto abort_with_mgmt_vector;
 256        }
 257        /* Setup the other blocks - the first n-1 vectors */
 258        for (i = 0; i < priv->num_ntfy_blks; i++) {
 259                struct gve_notify_block *block = &priv->ntfy_blocks[i];
 260                int msix_idx = i;
 261
 262                snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
 263                         name, i);
 264                block->priv = priv;
 265                err = request_irq(priv->msix_vectors[msix_idx].vector,
 266                                  gve_intr, 0, block->name, block);
 267                if (err) {
 268                        dev_err(&priv->pdev->dev,
 269                                "Failed to receive msix vector %d\n", i);
 270                        goto abort_with_some_ntfy_blocks;
 271                }
 272                irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
 273                                      get_cpu_mask(i % active_cpus));
 274        }
 275        return 0;
 276abort_with_some_ntfy_blocks:
 277        for (j = 0; j < i; j++) {
 278                struct gve_notify_block *block = &priv->ntfy_blocks[j];
 279                int msix_idx = j;
 280
 281                irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
 282                                      NULL);
 283                free_irq(priv->msix_vectors[msix_idx].vector, block);
 284        }
 285        dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
 286                          sizeof(*priv->ntfy_blocks),
 287                          priv->ntfy_blocks, priv->ntfy_block_bus);
 288        priv->ntfy_blocks = NULL;
 289abort_with_mgmt_vector:
 290        free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 291abort_with_msix_enabled:
 292        pci_disable_msix(priv->pdev);
 293abort_with_msix_vectors:
 294        kvfree(priv->msix_vectors);
 295        priv->msix_vectors = NULL;
 296        return err;
 297}
 298
 299static void gve_free_notify_blocks(struct gve_priv *priv)
 300{
 301        int i;
 302
 303        /* Free the irqs */
 304        for (i = 0; i < priv->num_ntfy_blks; i++) {
 305                struct gve_notify_block *block = &priv->ntfy_blocks[i];
 306                int msix_idx = i;
 307
 308                irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
 309                                      NULL);
 310                free_irq(priv->msix_vectors[msix_idx].vector, block);
 311        }
 312        dma_free_coherent(&priv->pdev->dev,
 313                          priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
 314                          priv->ntfy_blocks, priv->ntfy_block_bus);
 315        priv->ntfy_blocks = NULL;
 316        free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 317        pci_disable_msix(priv->pdev);
 318        kvfree(priv->msix_vectors);
 319        priv->msix_vectors = NULL;
 320}
 321
 322static int gve_setup_device_resources(struct gve_priv *priv)
 323{
 324        int err;
 325
 326        err = gve_alloc_counter_array(priv);
 327        if (err)
 328                return err;
 329        err = gve_alloc_notify_blocks(priv);
 330        if (err)
 331                goto abort_with_counter;
 332        err = gve_alloc_stats_report(priv);
 333        if (err)
 334                goto abort_with_ntfy_blocks;
 335        err = gve_adminq_configure_device_resources(priv,
 336                                                    priv->counter_array_bus,
 337                                                    priv->num_event_counters,
 338                                                    priv->ntfy_block_bus,
 339                                                    priv->num_ntfy_blks);
 340        if (unlikely(err)) {
 341                dev_err(&priv->pdev->dev,
 342                        "could not setup device_resources: err=%d\n", err);
 343                err = -ENXIO;
 344                goto abort_with_stats_report;
 345        }
 346        err = gve_adminq_report_stats(priv, priv->stats_report_len,
 347                                      priv->stats_report_bus,
 348                                      GVE_STATS_REPORT_TIMER_PERIOD);
 349        if (err)
 350                dev_err(&priv->pdev->dev,
 351                        "Failed to report stats: err=%d\n", err);
 352        gve_set_device_resources_ok(priv);
 353        return 0;
 354abort_with_stats_report:
 355        gve_free_stats_report(priv);
 356abort_with_ntfy_blocks:
 357        gve_free_notify_blocks(priv);
 358abort_with_counter:
 359        gve_free_counter_array(priv);
 360        return err;
 361}
 362
 363static void gve_trigger_reset(struct gve_priv *priv);
 364
 365static void gve_teardown_device_resources(struct gve_priv *priv)
 366{
 367        int err;
 368
 369        /* Tell device its resources are being freed */
 370        if (gve_get_device_resources_ok(priv)) {
 371                /* detach the stats report */
 372                err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
 373                if (err) {
 374                        dev_err(&priv->pdev->dev,
 375                                "Failed to detach stats report: err=%d\n", err);
 376                        gve_trigger_reset(priv);
 377                }
 378                err = gve_adminq_deconfigure_device_resources(priv);
 379                if (err) {
 380                        dev_err(&priv->pdev->dev,
 381                                "Could not deconfigure device resources: err=%d\n",
 382                                err);
 383                        gve_trigger_reset(priv);
 384                }
 385        }
 386        gve_free_counter_array(priv);
 387        gve_free_notify_blocks(priv);
 388        gve_free_stats_report(priv);
 389        gve_clear_device_resources_ok(priv);
 390}
 391
 392static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
 393{
 394        struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 395
 396        netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
 397                       NAPI_POLL_WEIGHT);
 398}
 399
 400static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
 401{
 402        struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 403
 404        netif_napi_del(&block->napi);
 405}
 406
 407static int gve_register_qpls(struct gve_priv *priv)
 408{
 409        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 410        int err;
 411        int i;
 412
 413        for (i = 0; i < num_qpls; i++) {
 414                err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
 415                if (err) {
 416                        netif_err(priv, drv, priv->dev,
 417                                  "failed to register queue page list %d\n",
 418                                  priv->qpls[i].id);
 419                        /* This failure will trigger a reset - no need to clean
 420                         * up
 421                         */
 422                        return err;
 423                }
 424        }
 425        return 0;
 426}
 427
 428static int gve_unregister_qpls(struct gve_priv *priv)
 429{
 430        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 431        int err;
 432        int i;
 433
 434        for (i = 0; i < num_qpls; i++) {
 435                err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
 436                /* This failure will trigger a reset - no need to clean up */
 437                if (err) {
 438                        netif_err(priv, drv, priv->dev,
 439                                  "Failed to unregister queue page list %d\n",
 440                                  priv->qpls[i].id);
 441                        return err;
 442                }
 443        }
 444        return 0;
 445}
 446
 447static int gve_create_rings(struct gve_priv *priv)
 448{
 449        int err;
 450        int i;
 451
 452        err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
 453        if (err) {
 454                netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
 455                          priv->tx_cfg.num_queues);
 456                /* This failure will trigger a reset - no need to clean
 457                 * up
 458                 */
 459                return err;
 460        }
 461        netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
 462                  priv->tx_cfg.num_queues);
 463
 464        err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
 465        if (err) {
 466                netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
 467                          priv->rx_cfg.num_queues);
 468                /* This failure will trigger a reset - no need to clean
 469                 * up
 470                 */
 471                return err;
 472        }
 473        netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
 474                  priv->rx_cfg.num_queues);
 475
 476        /* Rx data ring has been prefilled with packet buffers at queue
 477         * allocation time.
 478         * Write the doorbell to provide descriptor slots and packet buffers
 479         * to the NIC.
 480         */
 481        for (i = 0; i < priv->rx_cfg.num_queues; i++)
 482                gve_rx_write_doorbell(priv, &priv->rx[i]);
 483
 484        return 0;
 485}
 486
 487static int gve_alloc_rings(struct gve_priv *priv)
 488{
 489        int ntfy_idx;
 490        int err;
 491        int i;
 492
 493        /* Setup tx rings */
 494        priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
 495                            GFP_KERNEL);
 496        if (!priv->tx)
 497                return -ENOMEM;
 498        err = gve_tx_alloc_rings(priv);
 499        if (err)
 500                goto free_tx;
 501        /* Setup rx rings */
 502        priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
 503                            GFP_KERNEL);
 504        if (!priv->rx) {
 505                err = -ENOMEM;
 506                goto free_tx_queue;
 507        }
 508        err = gve_rx_alloc_rings(priv);
 509        if (err)
 510                goto free_rx;
 511        /* Add tx napi & init sync stats*/
 512        for (i = 0; i < priv->tx_cfg.num_queues; i++) {
 513                u64_stats_init(&priv->tx[i].statss);
 514                ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
 515                gve_add_napi(priv, ntfy_idx);
 516        }
 517        /* Add rx napi  & init sync stats*/
 518        for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 519                u64_stats_init(&priv->rx[i].statss);
 520                ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
 521                gve_add_napi(priv, ntfy_idx);
 522        }
 523
 524        return 0;
 525
 526free_rx:
 527        kvfree(priv->rx);
 528        priv->rx = NULL;
 529free_tx_queue:
 530        gve_tx_free_rings(priv);
 531free_tx:
 532        kvfree(priv->tx);
 533        priv->tx = NULL;
 534        return err;
 535}
 536
 537static int gve_destroy_rings(struct gve_priv *priv)
 538{
 539        int err;
 540
 541        err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
 542        if (err) {
 543                netif_err(priv, drv, priv->dev,
 544                          "failed to destroy tx queues\n");
 545                /* This failure will trigger a reset - no need to clean up */
 546                return err;
 547        }
 548        netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
 549        err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
 550        if (err) {
 551                netif_err(priv, drv, priv->dev,
 552                          "failed to destroy rx queues\n");
 553                /* This failure will trigger a reset - no need to clean up */
 554                return err;
 555        }
 556        netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
 557        return 0;
 558}
 559
 560static void gve_free_rings(struct gve_priv *priv)
 561{
 562        int ntfy_idx;
 563        int i;
 564
 565        if (priv->tx) {
 566                for (i = 0; i < priv->tx_cfg.num_queues; i++) {
 567                        ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
 568                        gve_remove_napi(priv, ntfy_idx);
 569                }
 570                gve_tx_free_rings(priv);
 571                kvfree(priv->tx);
 572                priv->tx = NULL;
 573        }
 574        if (priv->rx) {
 575                for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 576                        ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
 577                        gve_remove_napi(priv, ntfy_idx);
 578                }
 579                gve_rx_free_rings(priv);
 580                kvfree(priv->rx);
 581                priv->rx = NULL;
 582        }
 583}
 584
 585int gve_alloc_page(struct gve_priv *priv, struct device *dev,
 586                   struct page **page, dma_addr_t *dma,
 587                   enum dma_data_direction dir)
 588{
 589        *page = alloc_page(GFP_KERNEL);
 590        if (!*page) {
 591                priv->page_alloc_fail++;
 592                return -ENOMEM;
 593        }
 594        *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
 595        if (dma_mapping_error(dev, *dma)) {
 596                priv->dma_mapping_error++;
 597                put_page(*page);
 598                return -ENOMEM;
 599        }
 600        return 0;
 601}
 602
 603static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
 604                                     int pages)
 605{
 606        struct gve_queue_page_list *qpl = &priv->qpls[id];
 607        int err;
 608        int i;
 609
 610        if (pages + priv->num_registered_pages > priv->max_registered_pages) {
 611                netif_err(priv, drv, priv->dev,
 612                          "Reached max number of registered pages %llu > %llu\n",
 613                          pages + priv->num_registered_pages,
 614                          priv->max_registered_pages);
 615                return -EINVAL;
 616        }
 617
 618        qpl->id = id;
 619        qpl->num_entries = 0;
 620        qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
 621        /* caller handles clean up */
 622        if (!qpl->pages)
 623                return -ENOMEM;
 624        qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
 625                                   GFP_KERNEL);
 626        /* caller handles clean up */
 627        if (!qpl->page_buses)
 628                return -ENOMEM;
 629
 630        for (i = 0; i < pages; i++) {
 631                err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
 632                                     &qpl->page_buses[i],
 633                                     gve_qpl_dma_dir(priv, id));
 634                /* caller handles clean up */
 635                if (err)
 636                        return -ENOMEM;
 637                qpl->num_entries++;
 638        }
 639        priv->num_registered_pages += pages;
 640
 641        return 0;
 642}
 643
 644void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
 645                   enum dma_data_direction dir)
 646{
 647        if (!dma_mapping_error(dev, dma))
 648                dma_unmap_page(dev, dma, PAGE_SIZE, dir);
 649        if (page)
 650                put_page(page);
 651}
 652
 653static void gve_free_queue_page_list(struct gve_priv *priv,
 654                                     int id)
 655{
 656        struct gve_queue_page_list *qpl = &priv->qpls[id];
 657        int i;
 658
 659        if (!qpl->pages)
 660                return;
 661        if (!qpl->page_buses)
 662                goto free_pages;
 663
 664        for (i = 0; i < qpl->num_entries; i++)
 665                gve_free_page(&priv->pdev->dev, qpl->pages[i],
 666                              qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
 667
 668        kvfree(qpl->page_buses);
 669free_pages:
 670        kvfree(qpl->pages);
 671        priv->num_registered_pages -= qpl->num_entries;
 672}
 673
 674static int gve_alloc_qpls(struct gve_priv *priv)
 675{
 676        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 677        int i, j;
 678        int err;
 679
 680        /* Raw addressing means no QPLs */
 681        if (priv->raw_addressing)
 682                return 0;
 683
 684        priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
 685        if (!priv->qpls)
 686                return -ENOMEM;
 687
 688        for (i = 0; i < gve_num_tx_qpls(priv); i++) {
 689                err = gve_alloc_queue_page_list(priv, i,
 690                                                priv->tx_pages_per_qpl);
 691                if (err)
 692                        goto free_qpls;
 693        }
 694        for (; i < num_qpls; i++) {
 695                err = gve_alloc_queue_page_list(priv, i,
 696                                                priv->rx_data_slot_cnt);
 697                if (err)
 698                        goto free_qpls;
 699        }
 700
 701        priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
 702                                     sizeof(unsigned long) * BITS_PER_BYTE;
 703        priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
 704                                            sizeof(unsigned long), GFP_KERNEL);
 705        if (!priv->qpl_cfg.qpl_id_map) {
 706                err = -ENOMEM;
 707                goto free_qpls;
 708        }
 709
 710        return 0;
 711
 712free_qpls:
 713        for (j = 0; j <= i; j++)
 714                gve_free_queue_page_list(priv, j);
 715        kvfree(priv->qpls);
 716        return err;
 717}
 718
 719static void gve_free_qpls(struct gve_priv *priv)
 720{
 721        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 722        int i;
 723
 724        /* Raw addressing means no QPLs */
 725        if (priv->raw_addressing)
 726                return;
 727
 728        kvfree(priv->qpl_cfg.qpl_id_map);
 729
 730        for (i = 0; i < num_qpls; i++)
 731                gve_free_queue_page_list(priv, i);
 732
 733        kvfree(priv->qpls);
 734}
 735
 736/* Use this to schedule a reset when the device is capable of continuing
 737 * to handle other requests in its current state. If it is not, do a reset
 738 * in thread instead.
 739 */
 740void gve_schedule_reset(struct gve_priv *priv)
 741{
 742        gve_set_do_reset(priv);
 743        queue_work(priv->gve_wq, &priv->service_task);
 744}
 745
 746static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
 747static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
 748static void gve_turndown(struct gve_priv *priv);
 749static void gve_turnup(struct gve_priv *priv);
 750
 751static int gve_open(struct net_device *dev)
 752{
 753        struct gve_priv *priv = netdev_priv(dev);
 754        int err;
 755
 756        err = gve_alloc_qpls(priv);
 757        if (err)
 758                return err;
 759        err = gve_alloc_rings(priv);
 760        if (err)
 761                goto free_qpls;
 762
 763        err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
 764        if (err)
 765                goto free_rings;
 766        err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
 767        if (err)
 768                goto free_rings;
 769
 770        err = gve_register_qpls(priv);
 771        if (err)
 772                goto reset;
 773        err = gve_create_rings(priv);
 774        if (err)
 775                goto reset;
 776        gve_set_device_rings_ok(priv);
 777
 778        if (gve_get_report_stats(priv))
 779                mod_timer(&priv->stats_report_timer,
 780                          round_jiffies(jiffies +
 781                                msecs_to_jiffies(priv->stats_report_timer_period)));
 782
 783        gve_turnup(priv);
 784        queue_work(priv->gve_wq, &priv->service_task);
 785        priv->interface_up_cnt++;
 786        return 0;
 787
 788free_rings:
 789        gve_free_rings(priv);
 790free_qpls:
 791        gve_free_qpls(priv);
 792        return err;
 793
 794reset:
 795        /* This must have been called from a reset due to the rtnl lock
 796         * so just return at this point.
 797         */
 798        if (gve_get_reset_in_progress(priv))
 799                return err;
 800        /* Otherwise reset before returning */
 801        gve_reset_and_teardown(priv, true);
 802        /* if this fails there is nothing we can do so just ignore the return */
 803        gve_reset_recovery(priv, false);
 804        /* return the original error */
 805        return err;
 806}
 807
 808static int gve_close(struct net_device *dev)
 809{
 810        struct gve_priv *priv = netdev_priv(dev);
 811        int err;
 812
 813        netif_carrier_off(dev);
 814        if (gve_get_device_rings_ok(priv)) {
 815                gve_turndown(priv);
 816                err = gve_destroy_rings(priv);
 817                if (err)
 818                        goto err;
 819                err = gve_unregister_qpls(priv);
 820                if (err)
 821                        goto err;
 822                gve_clear_device_rings_ok(priv);
 823        }
 824        del_timer_sync(&priv->stats_report_timer);
 825
 826        gve_free_rings(priv);
 827        gve_free_qpls(priv);
 828        priv->interface_down_cnt++;
 829        return 0;
 830
 831err:
 832        /* This must have been called from a reset due to the rtnl lock
 833         * so just return at this point.
 834         */
 835        if (gve_get_reset_in_progress(priv))
 836                return err;
 837        /* Otherwise reset before returning */
 838        gve_reset_and_teardown(priv, true);
 839        return gve_reset_recovery(priv, false);
 840}
 841
 842int gve_adjust_queues(struct gve_priv *priv,
 843                      struct gve_queue_config new_rx_config,
 844                      struct gve_queue_config new_tx_config)
 845{
 846        int err;
 847
 848        if (netif_carrier_ok(priv->dev)) {
 849                /* To make this process as simple as possible we teardown the
 850                 * device, set the new configuration, and then bring the device
 851                 * up again.
 852                 */
 853                err = gve_close(priv->dev);
 854                /* we have already tried to reset in close,
 855                 * just fail at this point
 856                 */
 857                if (err)
 858                        return err;
 859                priv->tx_cfg = new_tx_config;
 860                priv->rx_cfg = new_rx_config;
 861
 862                err = gve_open(priv->dev);
 863                if (err)
 864                        goto err;
 865
 866                return 0;
 867        }
 868        /* Set the config for the next up. */
 869        priv->tx_cfg = new_tx_config;
 870        priv->rx_cfg = new_rx_config;
 871
 872        return 0;
 873err:
 874        netif_err(priv, drv, priv->dev,
 875                  "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
 876        gve_turndown(priv);
 877        return err;
 878}
 879
 880static void gve_turndown(struct gve_priv *priv)
 881{
 882        int idx;
 883
 884        if (netif_carrier_ok(priv->dev))
 885                netif_carrier_off(priv->dev);
 886
 887        if (!gve_get_napi_enabled(priv))
 888                return;
 889
 890        /* Disable napi to prevent more work from coming in */
 891        for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
 892                int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
 893                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 894
 895                napi_disable(&block->napi);
 896        }
 897        for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
 898                int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
 899                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 900
 901                napi_disable(&block->napi);
 902        }
 903
 904        /* Stop tx queues */
 905        netif_tx_disable(priv->dev);
 906
 907        gve_clear_napi_enabled(priv);
 908        gve_clear_report_stats(priv);
 909}
 910
 911static void gve_turnup(struct gve_priv *priv)
 912{
 913        int idx;
 914
 915        /* Start the tx queues */
 916        netif_tx_start_all_queues(priv->dev);
 917
 918        /* Enable napi and unmask interrupts for all queues */
 919        for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
 920                int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
 921                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 922
 923                napi_enable(&block->napi);
 924                iowrite32be(0, gve_irq_doorbell(priv, block));
 925        }
 926        for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
 927                int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
 928                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 929
 930                napi_enable(&block->napi);
 931                iowrite32be(0, gve_irq_doorbell(priv, block));
 932        }
 933
 934        gve_set_napi_enabled(priv);
 935}
 936
 937static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
 938{
 939        struct gve_priv *priv = netdev_priv(dev);
 940
 941        gve_schedule_reset(priv);
 942        priv->tx_timeo_cnt++;
 943}
 944
 945static const struct net_device_ops gve_netdev_ops = {
 946        .ndo_start_xmit         =       gve_tx,
 947        .ndo_open               =       gve_open,
 948        .ndo_stop               =       gve_close,
 949        .ndo_get_stats64        =       gve_get_stats,
 950        .ndo_tx_timeout         =       gve_tx_timeout,
 951};
 952
 953static void gve_handle_status(struct gve_priv *priv, u32 status)
 954{
 955        if (GVE_DEVICE_STATUS_RESET_MASK & status) {
 956                dev_info(&priv->pdev->dev, "Device requested reset.\n");
 957                gve_set_do_reset(priv);
 958        }
 959        if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
 960                priv->stats_report_trigger_cnt++;
 961                gve_set_do_report_stats(priv);
 962        }
 963}
 964
 965static void gve_handle_reset(struct gve_priv *priv)
 966{
 967        /* A service task will be scheduled at the end of probe to catch any
 968         * resets that need to happen, and we don't want to reset until
 969         * probe is done.
 970         */
 971        if (gve_get_probe_in_progress(priv))
 972                return;
 973
 974        if (gve_get_do_reset(priv)) {
 975                rtnl_lock();
 976                gve_reset(priv, false);
 977                rtnl_unlock();
 978        }
 979}
 980
 981void gve_handle_report_stats(struct gve_priv *priv)
 982{
 983        int idx, stats_idx = 0, tx_bytes;
 984        unsigned int start = 0;
 985        struct stats *stats = priv->stats_report->stats;
 986
 987        if (!gve_get_report_stats(priv))
 988                return;
 989
 990        be64_add_cpu(&priv->stats_report->written_count, 1);
 991        /* tx stats */
 992        if (priv->tx) {
 993                for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
 994                        do {
 995                                start = u64_stats_fetch_begin(&priv->tx[idx].statss);
 996                                tx_bytes = priv->tx[idx].bytes_done;
 997                        } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
 998                        stats[stats_idx++] = (struct stats) {
 999                                .stat_name = cpu_to_be32(TX_WAKE_CNT),
1000                                .value = cpu_to_be64(priv->tx[idx].wake_queue),
1001                                .queue_id = cpu_to_be32(idx),
1002                        };
1003                        stats[stats_idx++] = (struct stats) {
1004                                .stat_name = cpu_to_be32(TX_STOP_CNT),
1005                                .value = cpu_to_be64(priv->tx[idx].stop_queue),
1006                                .queue_id = cpu_to_be32(idx),
1007                        };
1008                        stats[stats_idx++] = (struct stats) {
1009                                .stat_name = cpu_to_be32(TX_FRAMES_SENT),
1010                                .value = cpu_to_be64(priv->tx[idx].req),
1011                                .queue_id = cpu_to_be32(idx),
1012                        };
1013                        stats[stats_idx++] = (struct stats) {
1014                                .stat_name = cpu_to_be32(TX_BYTES_SENT),
1015                                .value = cpu_to_be64(tx_bytes),
1016                                .queue_id = cpu_to_be32(idx),
1017                        };
1018                        stats[stats_idx++] = (struct stats) {
1019                                .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1020                                .value = cpu_to_be64(priv->tx[idx].done),
1021                                .queue_id = cpu_to_be32(idx),
1022                        };
1023                }
1024        }
1025        /* rx stats */
1026        if (priv->rx) {
1027                for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1028                        stats[stats_idx++] = (struct stats) {
1029                                .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1030                                .value = cpu_to_be64(priv->rx[idx].desc.seqno),
1031                                .queue_id = cpu_to_be32(idx),
1032                        };
1033                        stats[stats_idx++] = (struct stats) {
1034                                .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1035                                .value = cpu_to_be64(priv->rx[0].fill_cnt),
1036                                .queue_id = cpu_to_be32(idx),
1037                        };
1038                }
1039        }
1040}
1041
1042static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1043{
1044        if (!gve_get_napi_enabled(priv))
1045                return;
1046
1047        if (link_status == netif_carrier_ok(priv->dev))
1048                return;
1049
1050        if (link_status) {
1051                netdev_info(priv->dev, "Device link is up.\n");
1052                netif_carrier_on(priv->dev);
1053        } else {
1054                netdev_info(priv->dev, "Device link is down.\n");
1055                netif_carrier_off(priv->dev);
1056        }
1057}
1058
1059/* Handle NIC status register changes, reset requests and report stats */
1060static void gve_service_task(struct work_struct *work)
1061{
1062        struct gve_priv *priv = container_of(work, struct gve_priv,
1063                                             service_task);
1064        u32 status = ioread32be(&priv->reg_bar0->device_status);
1065
1066        gve_handle_status(priv, status);
1067
1068        gve_handle_reset(priv);
1069        gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1070}
1071
1072static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
1073{
1074        int num_ntfy;
1075        int err;
1076
1077        /* Set up the adminq */
1078        err = gve_adminq_alloc(&priv->pdev->dev, priv);
1079        if (err) {
1080                dev_err(&priv->pdev->dev,
1081                        "Failed to alloc admin queue: err=%d\n", err);
1082                return err;
1083        }
1084
1085        if (skip_describe_device)
1086                goto setup_device;
1087
1088        priv->raw_addressing = false;
1089        /* Get the initial information we need from the device */
1090        err = gve_adminq_describe_device(priv);
1091        if (err) {
1092                dev_err(&priv->pdev->dev,
1093                        "Could not get device information: err=%d\n", err);
1094                goto err;
1095        }
1096        if (priv->dev->max_mtu > PAGE_SIZE) {
1097                priv->dev->max_mtu = PAGE_SIZE;
1098                err = gve_adminq_set_mtu(priv, priv->dev->mtu);
1099                if (err) {
1100                        dev_err(&priv->pdev->dev, "Could not set mtu");
1101                        goto err;
1102                }
1103        }
1104        priv->dev->mtu = priv->dev->max_mtu;
1105        num_ntfy = pci_msix_vec_count(priv->pdev);
1106        if (num_ntfy <= 0) {
1107                dev_err(&priv->pdev->dev,
1108                        "could not count MSI-x vectors: err=%d\n", num_ntfy);
1109                err = num_ntfy;
1110                goto err;
1111        } else if (num_ntfy < GVE_MIN_MSIX) {
1112                dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
1113                        GVE_MIN_MSIX, num_ntfy);
1114                err = -EINVAL;
1115                goto err;
1116        }
1117
1118        priv->num_registered_pages = 0;
1119        priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
1120        /* gvnic has one Notification Block per MSI-x vector, except for the
1121         * management vector
1122         */
1123        priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
1124        priv->mgmt_msix_idx = priv->num_ntfy_blks;
1125
1126        priv->tx_cfg.max_queues =
1127                min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
1128        priv->rx_cfg.max_queues =
1129                min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
1130
1131        priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
1132        priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
1133        if (priv->default_num_queues > 0) {
1134                priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
1135                                                priv->tx_cfg.num_queues);
1136                priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
1137                                                priv->rx_cfg.num_queues);
1138        }
1139
1140        dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
1141                 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
1142        dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
1143                 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
1144
1145setup_device:
1146        err = gve_setup_device_resources(priv);
1147        if (!err)
1148                return 0;
1149err:
1150        gve_adminq_free(&priv->pdev->dev, priv);
1151        return err;
1152}
1153
1154static void gve_teardown_priv_resources(struct gve_priv *priv)
1155{
1156        gve_teardown_device_resources(priv);
1157        gve_adminq_free(&priv->pdev->dev, priv);
1158}
1159
1160static void gve_trigger_reset(struct gve_priv *priv)
1161{
1162        /* Reset the device by releasing the AQ */
1163        gve_adminq_release(priv);
1164}
1165
1166static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
1167{
1168        gve_trigger_reset(priv);
1169        /* With the reset having already happened, close cannot fail */
1170        if (was_up)
1171                gve_close(priv->dev);
1172        gve_teardown_priv_resources(priv);
1173}
1174
1175static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
1176{
1177        int err;
1178
1179        err = gve_init_priv(priv, true);
1180        if (err)
1181                goto err;
1182        if (was_up) {
1183                err = gve_open(priv->dev);
1184                if (err)
1185                        goto err;
1186        }
1187        return 0;
1188err:
1189        dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1190        gve_turndown(priv);
1191        return err;
1192}
1193
1194int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1195{
1196        bool was_up = netif_carrier_ok(priv->dev);
1197        int err;
1198
1199        dev_info(&priv->pdev->dev, "Performing reset\n");
1200        gve_clear_do_reset(priv);
1201        gve_set_reset_in_progress(priv);
1202        /* If we aren't attempting to teardown normally, just go turndown and
1203         * reset right away.
1204         */
1205        if (!attempt_teardown) {
1206                gve_turndown(priv);
1207                gve_reset_and_teardown(priv, was_up);
1208        } else {
1209                /* Otherwise attempt to close normally */
1210                if (was_up) {
1211                        err = gve_close(priv->dev);
1212                        /* If that fails reset as we did above */
1213                        if (err)
1214                                gve_reset_and_teardown(priv, was_up);
1215                }
1216                /* Clean up any remaining resources */
1217                gve_teardown_priv_resources(priv);
1218        }
1219
1220        /* Set it all back up */
1221        err = gve_reset_recovery(priv, was_up);
1222        gve_clear_reset_in_progress(priv);
1223        priv->reset_cnt++;
1224        priv->interface_up_cnt = 0;
1225        priv->interface_down_cnt = 0;
1226        priv->stats_report_trigger_cnt = 0;
1227        return err;
1228}
1229
1230static void gve_write_version(u8 __iomem *driver_version_register)
1231{
1232        const char *c = gve_version_prefix;
1233
1234        while (*c) {
1235                writeb(*c, driver_version_register);
1236                c++;
1237        }
1238
1239        c = gve_version_str;
1240        while (*c) {
1241                writeb(*c, driver_version_register);
1242                c++;
1243        }
1244        writeb('\n', driver_version_register);
1245}
1246
1247static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1248{
1249        int max_tx_queues, max_rx_queues;
1250        struct net_device *dev;
1251        __be32 __iomem *db_bar;
1252        struct gve_registers __iomem *reg_bar;
1253        struct gve_priv *priv;
1254        int err;
1255
1256        err = pci_enable_device(pdev);
1257        if (err)
1258                return -ENXIO;
1259
1260        err = pci_request_regions(pdev, "gvnic-cfg");
1261        if (err)
1262                goto abort_with_enabled;
1263
1264        pci_set_master(pdev);
1265
1266        err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1267        if (err) {
1268                dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1269                goto abort_with_pci_region;
1270        }
1271
1272        err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1273        if (err) {
1274                dev_err(&pdev->dev,
1275                        "Failed to set consistent dma mask: err=%d\n", err);
1276                goto abort_with_pci_region;
1277        }
1278
1279        reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1280        if (!reg_bar) {
1281                dev_err(&pdev->dev, "Failed to map pci bar!\n");
1282                err = -ENOMEM;
1283                goto abort_with_pci_region;
1284        }
1285
1286        db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1287        if (!db_bar) {
1288                dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1289                err = -ENOMEM;
1290                goto abort_with_reg_bar;
1291        }
1292
1293        gve_write_version(&reg_bar->driver_version);
1294        /* Get max queues to alloc etherdev */
1295        max_rx_queues = ioread32be(&reg_bar->max_tx_queues);
1296        max_tx_queues = ioread32be(&reg_bar->max_rx_queues);
1297        /* Alloc and setup the netdev and priv */
1298        dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1299        if (!dev) {
1300                dev_err(&pdev->dev, "could not allocate netdev\n");
1301                goto abort_with_db_bar;
1302        }
1303        SET_NETDEV_DEV(dev, &pdev->dev);
1304        pci_set_drvdata(pdev, dev);
1305        dev->ethtool_ops = &gve_ethtool_ops;
1306        dev->netdev_ops = &gve_netdev_ops;
1307        /* advertise features */
1308        dev->hw_features = NETIF_F_HIGHDMA;
1309        dev->hw_features |= NETIF_F_SG;
1310        dev->hw_features |= NETIF_F_HW_CSUM;
1311        dev->hw_features |= NETIF_F_TSO;
1312        dev->hw_features |= NETIF_F_TSO6;
1313        dev->hw_features |= NETIF_F_TSO_ECN;
1314        dev->hw_features |= NETIF_F_RXCSUM;
1315        dev->hw_features |= NETIF_F_RXHASH;
1316        dev->features = dev->hw_features;
1317        dev->watchdog_timeo = 5 * HZ;
1318        dev->min_mtu = ETH_MIN_MTU;
1319        netif_carrier_off(dev);
1320
1321        priv = netdev_priv(dev);
1322        priv->dev = dev;
1323        priv->pdev = pdev;
1324        priv->msg_enable = DEFAULT_MSG_LEVEL;
1325        priv->reg_bar0 = reg_bar;
1326        priv->db_bar2 = db_bar;
1327        priv->service_task_flags = 0x0;
1328        priv->state_flags = 0x0;
1329        priv->ethtool_flags = 0x0;
1330
1331        gve_set_probe_in_progress(priv);
1332        priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1333        if (!priv->gve_wq) {
1334                dev_err(&pdev->dev, "Could not allocate workqueue");
1335                err = -ENOMEM;
1336                goto abort_with_netdev;
1337        }
1338        INIT_WORK(&priv->service_task, gve_service_task);
1339        INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
1340        priv->tx_cfg.max_queues = max_tx_queues;
1341        priv->rx_cfg.max_queues = max_rx_queues;
1342
1343        err = gve_init_priv(priv, false);
1344        if (err)
1345                goto abort_with_wq;
1346
1347        err = register_netdev(dev);
1348        if (err)
1349                goto abort_with_wq;
1350
1351        dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1352        gve_clear_probe_in_progress(priv);
1353        queue_work(priv->gve_wq, &priv->service_task);
1354        return 0;
1355
1356abort_with_wq:
1357        destroy_workqueue(priv->gve_wq);
1358
1359abort_with_netdev:
1360        free_netdev(dev);
1361
1362abort_with_db_bar:
1363        pci_iounmap(pdev, db_bar);
1364
1365abort_with_reg_bar:
1366        pci_iounmap(pdev, reg_bar);
1367
1368abort_with_pci_region:
1369        pci_release_regions(pdev);
1370
1371abort_with_enabled:
1372        pci_disable_device(pdev);
1373        return -ENXIO;
1374}
1375
1376static void gve_remove(struct pci_dev *pdev)
1377{
1378        struct net_device *netdev = pci_get_drvdata(pdev);
1379        struct gve_priv *priv = netdev_priv(netdev);
1380        __be32 __iomem *db_bar = priv->db_bar2;
1381        void __iomem *reg_bar = priv->reg_bar0;
1382
1383        unregister_netdev(netdev);
1384        gve_teardown_priv_resources(priv);
1385        destroy_workqueue(priv->gve_wq);
1386        free_netdev(netdev);
1387        pci_iounmap(pdev, db_bar);
1388        pci_iounmap(pdev, reg_bar);
1389        pci_release_regions(pdev);
1390        pci_disable_device(pdev);
1391}
1392
1393static const struct pci_device_id gve_id_table[] = {
1394        { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1395        { }
1396};
1397
1398static struct pci_driver gvnic_driver = {
1399        .name           = "gvnic",
1400        .id_table       = gve_id_table,
1401        .probe          = gve_probe,
1402        .remove         = gve_remove,
1403};
1404
1405module_pci_driver(gvnic_driver);
1406
1407MODULE_DEVICE_TABLE(pci, gve_id_table);
1408MODULE_AUTHOR("Google, Inc.");
1409MODULE_DESCRIPTION("gVNIC Driver");
1410MODULE_LICENSE("Dual MIT/GPL");
1411MODULE_VERSION(GVE_VERSION);
1412