linux/drivers/net/ethernet/google/gve/gve_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
   2/* Google virtual Ethernet (gve) driver
   3 *
   4 * Copyright (C) 2015-2021 Google, Inc.
   5 */
   6
   7#include <linux/cpumask.h>
   8#include <linux/etherdevice.h>
   9#include <linux/interrupt.h>
  10#include <linux/module.h>
  11#include <linux/pci.h>
  12#include <linux/sched.h>
  13#include <linux/timer.h>
  14#include <linux/workqueue.h>
  15#include <net/sch_generic.h>
  16#include "gve.h"
  17#include "gve_dqo.h"
  18#include "gve_adminq.h"
  19#include "gve_register.h"
  20
  21#define GVE_DEFAULT_RX_COPYBREAK        (256)
  22
  23#define DEFAULT_MSG_LEVEL       (NETIF_MSG_DRV | NETIF_MSG_LINK)
  24#define GVE_VERSION             "1.0.0"
  25#define GVE_VERSION_PREFIX      "GVE-"
  26
  27const char gve_version_str[] = GVE_VERSION;
  28static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
  29
  30static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
  31{
  32        struct gve_priv *priv = netdev_priv(dev);
  33
  34        if (gve_is_gqi(priv))
  35                return gve_tx(skb, dev);
  36        else
  37                return gve_tx_dqo(skb, dev);
  38}
  39
  40static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
  41{
  42        struct gve_priv *priv = netdev_priv(dev);
  43        unsigned int start;
  44        u64 packets, bytes;
  45        int ring;
  46
  47        if (priv->rx) {
  48                for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
  49                        do {
  50                                start =
  51                                  u64_stats_fetch_begin(&priv->rx[ring].statss);
  52                                packets = priv->rx[ring].rpackets;
  53                                bytes = priv->rx[ring].rbytes;
  54                        } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
  55                                                       start));
  56                        s->rx_packets += packets;
  57                        s->rx_bytes += bytes;
  58                }
  59        }
  60        if (priv->tx) {
  61                for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
  62                        do {
  63                                start =
  64                                  u64_stats_fetch_begin(&priv->tx[ring].statss);
  65                                packets = priv->tx[ring].pkt_done;
  66                                bytes = priv->tx[ring].bytes_done;
  67                        } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
  68                                                       start));
  69                        s->tx_packets += packets;
  70                        s->tx_bytes += bytes;
  71                }
  72        }
  73}
  74
  75static int gve_alloc_counter_array(struct gve_priv *priv)
  76{
  77        priv->counter_array =
  78                dma_alloc_coherent(&priv->pdev->dev,
  79                                   priv->num_event_counters *
  80                                   sizeof(*priv->counter_array),
  81                                   &priv->counter_array_bus, GFP_KERNEL);
  82        if (!priv->counter_array)
  83                return -ENOMEM;
  84
  85        return 0;
  86}
  87
  88static void gve_free_counter_array(struct gve_priv *priv)
  89{
  90        if (!priv->counter_array)
  91                return;
  92
  93        dma_free_coherent(&priv->pdev->dev,
  94                          priv->num_event_counters *
  95                          sizeof(*priv->counter_array),
  96                          priv->counter_array, priv->counter_array_bus);
  97        priv->counter_array = NULL;
  98}
  99
 100/* NIC requests to report stats */
 101static void gve_stats_report_task(struct work_struct *work)
 102{
 103        struct gve_priv *priv = container_of(work, struct gve_priv,
 104                                             stats_report_task);
 105        if (gve_get_do_report_stats(priv)) {
 106                gve_handle_report_stats(priv);
 107                gve_clear_do_report_stats(priv);
 108        }
 109}
 110
 111static void gve_stats_report_schedule(struct gve_priv *priv)
 112{
 113        if (!gve_get_probe_in_progress(priv) &&
 114            !gve_get_reset_in_progress(priv)) {
 115                gve_set_do_report_stats(priv);
 116                queue_work(priv->gve_wq, &priv->stats_report_task);
 117        }
 118}
 119
 120static void gve_stats_report_timer(struct timer_list *t)
 121{
 122        struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
 123
 124        mod_timer(&priv->stats_report_timer,
 125                  round_jiffies(jiffies +
 126                  msecs_to_jiffies(priv->stats_report_timer_period)));
 127        gve_stats_report_schedule(priv);
 128}
 129
 130static int gve_alloc_stats_report(struct gve_priv *priv)
 131{
 132        int tx_stats_num, rx_stats_num;
 133
 134        tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
 135                       priv->tx_cfg.num_queues;
 136        rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
 137                       priv->rx_cfg.num_queues;
 138        priv->stats_report_len = struct_size(priv->stats_report, stats,
 139                                             tx_stats_num + rx_stats_num);
 140        priv->stats_report =
 141                dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
 142                                   &priv->stats_report_bus, GFP_KERNEL);
 143        if (!priv->stats_report)
 144                return -ENOMEM;
 145        /* Set up timer for the report-stats task */
 146        timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
 147        priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
 148        return 0;
 149}
 150
 151static void gve_free_stats_report(struct gve_priv *priv)
 152{
 153        if (!priv->stats_report)
 154                return;
 155
 156        del_timer_sync(&priv->stats_report_timer);
 157        dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
 158                          priv->stats_report, priv->stats_report_bus);
 159        priv->stats_report = NULL;
 160}
 161
 162static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
 163{
 164        struct gve_priv *priv = arg;
 165
 166        queue_work(priv->gve_wq, &priv->service_task);
 167        return IRQ_HANDLED;
 168}
 169
 170static irqreturn_t gve_intr(int irq, void *arg)
 171{
 172        struct gve_notify_block *block = arg;
 173        struct gve_priv *priv = block->priv;
 174
 175        iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
 176        napi_schedule_irqoff(&block->napi);
 177        return IRQ_HANDLED;
 178}
 179
 180static irqreturn_t gve_intr_dqo(int irq, void *arg)
 181{
 182        struct gve_notify_block *block = arg;
 183
 184        /* Interrupts are automatically masked */
 185        napi_schedule_irqoff(&block->napi);
 186        return IRQ_HANDLED;
 187}
 188
 189static int gve_napi_poll(struct napi_struct *napi, int budget)
 190{
 191        struct gve_notify_block *block;
 192        __be32 __iomem *irq_doorbell;
 193        bool reschedule = false;
 194        struct gve_priv *priv;
 195
 196        block = container_of(napi, struct gve_notify_block, napi);
 197        priv = block->priv;
 198
 199        if (block->tx)
 200                reschedule |= gve_tx_poll(block, budget);
 201        if (block->rx)
 202                reschedule |= gve_rx_poll(block, budget);
 203
 204        if (reschedule)
 205                return budget;
 206
 207        napi_complete(napi);
 208        irq_doorbell = gve_irq_doorbell(priv, block);
 209        iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
 210
 211        /* Double check we have no extra work.
 212         * Ensure unmask synchronizes with checking for work.
 213         */
 214        mb();
 215        if (block->tx)
 216                reschedule |= gve_tx_poll(block, -1);
 217        if (block->rx)
 218                reschedule |= gve_rx_poll(block, -1);
 219        if (reschedule && napi_reschedule(napi))
 220                iowrite32be(GVE_IRQ_MASK, irq_doorbell);
 221
 222        return 0;
 223}
 224
 225static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
 226{
 227        struct gve_notify_block *block =
 228                container_of(napi, struct gve_notify_block, napi);
 229        struct gve_priv *priv = block->priv;
 230        bool reschedule = false;
 231        int work_done = 0;
 232
 233        /* Clear PCI MSI-X Pending Bit Array (PBA)
 234         *
 235         * This bit is set if an interrupt event occurs while the vector is
 236         * masked. If this bit is set and we reenable the interrupt, it will
 237         * fire again. Since we're just about to poll the queue state, we don't
 238         * need it to fire again.
 239         *
 240         * Under high softirq load, it's possible that the interrupt condition
 241         * is triggered twice before we got the chance to process it.
 242         */
 243        gve_write_irq_doorbell_dqo(priv, block,
 244                                   GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO);
 245
 246        if (block->tx)
 247                reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
 248
 249        if (block->rx) {
 250                work_done = gve_rx_poll_dqo(block, budget);
 251                reschedule |= work_done == budget;
 252        }
 253
 254        if (reschedule)
 255                return budget;
 256
 257        if (likely(napi_complete_done(napi, work_done))) {
 258                /* Enable interrupts again.
 259                 *
 260                 * We don't need to repoll afterwards because HW supports the
 261                 * PCI MSI-X PBA feature.
 262                 *
 263                 * Another interrupt would be triggered if a new event came in
 264                 * since the last one.
 265                 */
 266                gve_write_irq_doorbell_dqo(priv, block,
 267                                           GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
 268        }
 269
 270        return work_done;
 271}
 272
 273static int gve_alloc_notify_blocks(struct gve_priv *priv)
 274{
 275        int num_vecs_requested = priv->num_ntfy_blks + 1;
 276        char *name = priv->dev->name;
 277        unsigned int active_cpus;
 278        int vecs_enabled;
 279        int i, j;
 280        int err;
 281
 282        priv->msix_vectors = kvzalloc(num_vecs_requested *
 283                                      sizeof(*priv->msix_vectors), GFP_KERNEL);
 284        if (!priv->msix_vectors)
 285                return -ENOMEM;
 286        for (i = 0; i < num_vecs_requested; i++)
 287                priv->msix_vectors[i].entry = i;
 288        vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
 289                                             GVE_MIN_MSIX, num_vecs_requested);
 290        if (vecs_enabled < 0) {
 291                dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
 292                        GVE_MIN_MSIX, vecs_enabled);
 293                err = vecs_enabled;
 294                goto abort_with_msix_vectors;
 295        }
 296        if (vecs_enabled != num_vecs_requested) {
 297                int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
 298                int vecs_per_type = new_num_ntfy_blks / 2;
 299                int vecs_left = new_num_ntfy_blks % 2;
 300
 301                priv->num_ntfy_blks = new_num_ntfy_blks;
 302                priv->mgmt_msix_idx = priv->num_ntfy_blks;
 303                priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
 304                                                vecs_per_type);
 305                priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
 306                                                vecs_per_type + vecs_left);
 307                dev_err(&priv->pdev->dev,
 308                        "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
 309                        vecs_enabled, priv->tx_cfg.max_queues,
 310                        priv->rx_cfg.max_queues);
 311                if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
 312                        priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
 313                if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
 314                        priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
 315        }
 316        /* Half the notification blocks go to TX and half to RX */
 317        active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
 318
 319        /* Setup Management Vector  - the last vector */
 320        snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
 321                 name);
 322        err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
 323                          gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
 324        if (err) {
 325                dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
 326                goto abort_with_msix_enabled;
 327        }
 328        priv->ntfy_blocks =
 329                dma_alloc_coherent(&priv->pdev->dev,
 330                                   priv->num_ntfy_blks *
 331                                   sizeof(*priv->ntfy_blocks),
 332                                   &priv->ntfy_block_bus, GFP_KERNEL);
 333        if (!priv->ntfy_blocks) {
 334                err = -ENOMEM;
 335                goto abort_with_mgmt_vector;
 336        }
 337        /* Setup the other blocks - the first n-1 vectors */
 338        for (i = 0; i < priv->num_ntfy_blks; i++) {
 339                struct gve_notify_block *block = &priv->ntfy_blocks[i];
 340                int msix_idx = i;
 341
 342                snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
 343                         name, i);
 344                block->priv = priv;
 345                err = request_irq(priv->msix_vectors[msix_idx].vector,
 346                                  gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
 347                                  0, block->name, block);
 348                if (err) {
 349                        dev_err(&priv->pdev->dev,
 350                                "Failed to receive msix vector %d\n", i);
 351                        goto abort_with_some_ntfy_blocks;
 352                }
 353                irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
 354                                      get_cpu_mask(i % active_cpus));
 355        }
 356        return 0;
 357abort_with_some_ntfy_blocks:
 358        for (j = 0; j < i; j++) {
 359                struct gve_notify_block *block = &priv->ntfy_blocks[j];
 360                int msix_idx = j;
 361
 362                irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
 363                                      NULL);
 364                free_irq(priv->msix_vectors[msix_idx].vector, block);
 365        }
 366        dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
 367                          sizeof(*priv->ntfy_blocks),
 368                          priv->ntfy_blocks, priv->ntfy_block_bus);
 369        priv->ntfy_blocks = NULL;
 370abort_with_mgmt_vector:
 371        free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 372abort_with_msix_enabled:
 373        pci_disable_msix(priv->pdev);
 374abort_with_msix_vectors:
 375        kvfree(priv->msix_vectors);
 376        priv->msix_vectors = NULL;
 377        return err;
 378}
 379
 380static void gve_free_notify_blocks(struct gve_priv *priv)
 381{
 382        int i;
 383
 384        if (!priv->msix_vectors)
 385                return;
 386
 387        /* Free the irqs */
 388        for (i = 0; i < priv->num_ntfy_blks; i++) {
 389                struct gve_notify_block *block = &priv->ntfy_blocks[i];
 390                int msix_idx = i;
 391
 392                irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
 393                                      NULL);
 394                free_irq(priv->msix_vectors[msix_idx].vector, block);
 395        }
 396        free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 397        dma_free_coherent(&priv->pdev->dev,
 398                          priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
 399                          priv->ntfy_blocks, priv->ntfy_block_bus);
 400        priv->ntfy_blocks = NULL;
 401        pci_disable_msix(priv->pdev);
 402        kvfree(priv->msix_vectors);
 403        priv->msix_vectors = NULL;
 404}
 405
 406static int gve_setup_device_resources(struct gve_priv *priv)
 407{
 408        int err;
 409
 410        err = gve_alloc_counter_array(priv);
 411        if (err)
 412                return err;
 413        err = gve_alloc_notify_blocks(priv);
 414        if (err)
 415                goto abort_with_counter;
 416        err = gve_alloc_stats_report(priv);
 417        if (err)
 418                goto abort_with_ntfy_blocks;
 419        err = gve_adminq_configure_device_resources(priv,
 420                                                    priv->counter_array_bus,
 421                                                    priv->num_event_counters,
 422                                                    priv->ntfy_block_bus,
 423                                                    priv->num_ntfy_blks);
 424        if (unlikely(err)) {
 425                dev_err(&priv->pdev->dev,
 426                        "could not setup device_resources: err=%d\n", err);
 427                err = -ENXIO;
 428                goto abort_with_stats_report;
 429        }
 430
 431        if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
 432                priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
 433                                               GFP_KERNEL);
 434                if (!priv->ptype_lut_dqo) {
 435                        err = -ENOMEM;
 436                        goto abort_with_stats_report;
 437                }
 438                err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
 439                if (err) {
 440                        dev_err(&priv->pdev->dev,
 441                                "Failed to get ptype map: err=%d\n", err);
 442                        goto abort_with_ptype_lut;
 443                }
 444        }
 445
 446        err = gve_adminq_report_stats(priv, priv->stats_report_len,
 447                                      priv->stats_report_bus,
 448                                      GVE_STATS_REPORT_TIMER_PERIOD);
 449        if (err)
 450                dev_err(&priv->pdev->dev,
 451                        "Failed to report stats: err=%d\n", err);
 452        gve_set_device_resources_ok(priv);
 453        return 0;
 454
 455abort_with_ptype_lut:
 456        kvfree(priv->ptype_lut_dqo);
 457        priv->ptype_lut_dqo = NULL;
 458abort_with_stats_report:
 459        gve_free_stats_report(priv);
 460abort_with_ntfy_blocks:
 461        gve_free_notify_blocks(priv);
 462abort_with_counter:
 463        gve_free_counter_array(priv);
 464
 465        return err;
 466}
 467
 468static void gve_trigger_reset(struct gve_priv *priv);
 469
 470static void gve_teardown_device_resources(struct gve_priv *priv)
 471{
 472        int err;
 473
 474        /* Tell device its resources are being freed */
 475        if (gve_get_device_resources_ok(priv)) {
 476                /* detach the stats report */
 477                err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
 478                if (err) {
 479                        dev_err(&priv->pdev->dev,
 480                                "Failed to detach stats report: err=%d\n", err);
 481                        gve_trigger_reset(priv);
 482                }
 483                err = gve_adminq_deconfigure_device_resources(priv);
 484                if (err) {
 485                        dev_err(&priv->pdev->dev,
 486                                "Could not deconfigure device resources: err=%d\n",
 487                                err);
 488                        gve_trigger_reset(priv);
 489                }
 490        }
 491
 492        kvfree(priv->ptype_lut_dqo);
 493        priv->ptype_lut_dqo = NULL;
 494
 495        gve_free_counter_array(priv);
 496        gve_free_notify_blocks(priv);
 497        gve_free_stats_report(priv);
 498        gve_clear_device_resources_ok(priv);
 499}
 500
 501static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
 502                         int (*gve_poll)(struct napi_struct *, int))
 503{
 504        struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 505
 506        netif_napi_add(priv->dev, &block->napi, gve_poll,
 507                       NAPI_POLL_WEIGHT);
 508}
 509
 510static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
 511{
 512        struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 513
 514        netif_napi_del(&block->napi);
 515}
 516
 517static int gve_register_qpls(struct gve_priv *priv)
 518{
 519        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 520        int err;
 521        int i;
 522
 523        for (i = 0; i < num_qpls; i++) {
 524                err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
 525                if (err) {
 526                        netif_err(priv, drv, priv->dev,
 527                                  "failed to register queue page list %d\n",
 528                                  priv->qpls[i].id);
 529                        /* This failure will trigger a reset - no need to clean
 530                         * up
 531                         */
 532                        return err;
 533                }
 534        }
 535        return 0;
 536}
 537
 538static int gve_unregister_qpls(struct gve_priv *priv)
 539{
 540        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 541        int err;
 542        int i;
 543
 544        for (i = 0; i < num_qpls; i++) {
 545                err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
 546                /* This failure will trigger a reset - no need to clean up */
 547                if (err) {
 548                        netif_err(priv, drv, priv->dev,
 549                                  "Failed to unregister queue page list %d\n",
 550                                  priv->qpls[i].id);
 551                        return err;
 552                }
 553        }
 554        return 0;
 555}
 556
 557static int gve_create_rings(struct gve_priv *priv)
 558{
 559        int err;
 560        int i;
 561
 562        err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
 563        if (err) {
 564                netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
 565                          priv->tx_cfg.num_queues);
 566                /* This failure will trigger a reset - no need to clean
 567                 * up
 568                 */
 569                return err;
 570        }
 571        netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
 572                  priv->tx_cfg.num_queues);
 573
 574        err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
 575        if (err) {
 576                netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
 577                          priv->rx_cfg.num_queues);
 578                /* This failure will trigger a reset - no need to clean
 579                 * up
 580                 */
 581                return err;
 582        }
 583        netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
 584                  priv->rx_cfg.num_queues);
 585
 586        if (gve_is_gqi(priv)) {
 587                /* Rx data ring has been prefilled with packet buffers at queue
 588                 * allocation time.
 589                 *
 590                 * Write the doorbell to provide descriptor slots and packet
 591                 * buffers to the NIC.
 592                 */
 593                for (i = 0; i < priv->rx_cfg.num_queues; i++)
 594                        gve_rx_write_doorbell(priv, &priv->rx[i]);
 595        } else {
 596                for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 597                        /* Post buffers and ring doorbell. */
 598                        gve_rx_post_buffers_dqo(&priv->rx[i]);
 599                }
 600        }
 601
 602        return 0;
 603}
 604
 605static void add_napi_init_sync_stats(struct gve_priv *priv,
 606                                     int (*napi_poll)(struct napi_struct *napi,
 607                                                      int budget))
 608{
 609        int i;
 610
 611        /* Add tx napi & init sync stats*/
 612        for (i = 0; i < priv->tx_cfg.num_queues; i++) {
 613                int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
 614
 615                u64_stats_init(&priv->tx[i].statss);
 616                priv->tx[i].ntfy_id = ntfy_idx;
 617                gve_add_napi(priv, ntfy_idx, napi_poll);
 618        }
 619        /* Add rx napi  & init sync stats*/
 620        for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 621                int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
 622
 623                u64_stats_init(&priv->rx[i].statss);
 624                priv->rx[i].ntfy_id = ntfy_idx;
 625                gve_add_napi(priv, ntfy_idx, napi_poll);
 626        }
 627}
 628
 629static void gve_tx_free_rings(struct gve_priv *priv)
 630{
 631        if (gve_is_gqi(priv)) {
 632                gve_tx_free_rings_gqi(priv);
 633        } else {
 634                gve_tx_free_rings_dqo(priv);
 635        }
 636}
 637
 638static int gve_alloc_rings(struct gve_priv *priv)
 639{
 640        int err;
 641
 642        /* Setup tx rings */
 643        priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
 644                            GFP_KERNEL);
 645        if (!priv->tx)
 646                return -ENOMEM;
 647
 648        if (gve_is_gqi(priv))
 649                err = gve_tx_alloc_rings(priv);
 650        else
 651                err = gve_tx_alloc_rings_dqo(priv);
 652        if (err)
 653                goto free_tx;
 654
 655        /* Setup rx rings */
 656        priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
 657                            GFP_KERNEL);
 658        if (!priv->rx) {
 659                err = -ENOMEM;
 660                goto free_tx_queue;
 661        }
 662
 663        if (gve_is_gqi(priv))
 664                err = gve_rx_alloc_rings(priv);
 665        else
 666                err = gve_rx_alloc_rings_dqo(priv);
 667        if (err)
 668                goto free_rx;
 669
 670        if (gve_is_gqi(priv))
 671                add_napi_init_sync_stats(priv, gve_napi_poll);
 672        else
 673                add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
 674
 675        return 0;
 676
 677free_rx:
 678        kvfree(priv->rx);
 679        priv->rx = NULL;
 680free_tx_queue:
 681        gve_tx_free_rings(priv);
 682free_tx:
 683        kvfree(priv->tx);
 684        priv->tx = NULL;
 685        return err;
 686}
 687
 688static int gve_destroy_rings(struct gve_priv *priv)
 689{
 690        int err;
 691
 692        err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
 693        if (err) {
 694                netif_err(priv, drv, priv->dev,
 695                          "failed to destroy tx queues\n");
 696                /* This failure will trigger a reset - no need to clean up */
 697                return err;
 698        }
 699        netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
 700        err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
 701        if (err) {
 702                netif_err(priv, drv, priv->dev,
 703                          "failed to destroy rx queues\n");
 704                /* This failure will trigger a reset - no need to clean up */
 705                return err;
 706        }
 707        netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
 708        return 0;
 709}
 710
 711static void gve_rx_free_rings(struct gve_priv *priv)
 712{
 713        if (gve_is_gqi(priv))
 714                gve_rx_free_rings_gqi(priv);
 715        else
 716                gve_rx_free_rings_dqo(priv);
 717}
 718
 719static void gve_free_rings(struct gve_priv *priv)
 720{
 721        int ntfy_idx;
 722        int i;
 723
 724        if (priv->tx) {
 725                for (i = 0; i < priv->tx_cfg.num_queues; i++) {
 726                        ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
 727                        gve_remove_napi(priv, ntfy_idx);
 728                }
 729                gve_tx_free_rings(priv);
 730                kvfree(priv->tx);
 731                priv->tx = NULL;
 732        }
 733        if (priv->rx) {
 734                for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 735                        ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
 736                        gve_remove_napi(priv, ntfy_idx);
 737                }
 738                gve_rx_free_rings(priv);
 739                kvfree(priv->rx);
 740                priv->rx = NULL;
 741        }
 742}
 743
 744int gve_alloc_page(struct gve_priv *priv, struct device *dev,
 745                   struct page **page, dma_addr_t *dma,
 746                   enum dma_data_direction dir)
 747{
 748        *page = alloc_page(GFP_KERNEL);
 749        if (!*page) {
 750                priv->page_alloc_fail++;
 751                return -ENOMEM;
 752        }
 753        *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
 754        if (dma_mapping_error(dev, *dma)) {
 755                priv->dma_mapping_error++;
 756                put_page(*page);
 757                return -ENOMEM;
 758        }
 759        return 0;
 760}
 761
 762static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
 763                                     int pages)
 764{
 765        struct gve_queue_page_list *qpl = &priv->qpls[id];
 766        int err;
 767        int i;
 768
 769        if (pages + priv->num_registered_pages > priv->max_registered_pages) {
 770                netif_err(priv, drv, priv->dev,
 771                          "Reached max number of registered pages %llu > %llu\n",
 772                          pages + priv->num_registered_pages,
 773                          priv->max_registered_pages);
 774                return -EINVAL;
 775        }
 776
 777        qpl->id = id;
 778        qpl->num_entries = 0;
 779        qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
 780        /* caller handles clean up */
 781        if (!qpl->pages)
 782                return -ENOMEM;
 783        qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
 784                                   GFP_KERNEL);
 785        /* caller handles clean up */
 786        if (!qpl->page_buses)
 787                return -ENOMEM;
 788
 789        for (i = 0; i < pages; i++) {
 790                err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
 791                                     &qpl->page_buses[i],
 792                                     gve_qpl_dma_dir(priv, id));
 793                /* caller handles clean up */
 794                if (err)
 795                        return -ENOMEM;
 796                qpl->num_entries++;
 797        }
 798        priv->num_registered_pages += pages;
 799
 800        return 0;
 801}
 802
 803void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
 804                   enum dma_data_direction dir)
 805{
 806        if (!dma_mapping_error(dev, dma))
 807                dma_unmap_page(dev, dma, PAGE_SIZE, dir);
 808        if (page)
 809                put_page(page);
 810}
 811
 812static void gve_free_queue_page_list(struct gve_priv *priv,
 813                                     int id)
 814{
 815        struct gve_queue_page_list *qpl = &priv->qpls[id];
 816        int i;
 817
 818        if (!qpl->pages)
 819                return;
 820        if (!qpl->page_buses)
 821                goto free_pages;
 822
 823        for (i = 0; i < qpl->num_entries; i++)
 824                gve_free_page(&priv->pdev->dev, qpl->pages[i],
 825                              qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
 826
 827        kvfree(qpl->page_buses);
 828free_pages:
 829        kvfree(qpl->pages);
 830        priv->num_registered_pages -= qpl->num_entries;
 831}
 832
 833static int gve_alloc_qpls(struct gve_priv *priv)
 834{
 835        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 836        int i, j;
 837        int err;
 838
 839        /* Raw addressing means no QPLs */
 840        if (priv->queue_format == GVE_GQI_RDA_FORMAT)
 841                return 0;
 842
 843        priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
 844        if (!priv->qpls)
 845                return -ENOMEM;
 846
 847        for (i = 0; i < gve_num_tx_qpls(priv); i++) {
 848                err = gve_alloc_queue_page_list(priv, i,
 849                                                priv->tx_pages_per_qpl);
 850                if (err)
 851                        goto free_qpls;
 852        }
 853        for (; i < num_qpls; i++) {
 854                err = gve_alloc_queue_page_list(priv, i,
 855                                                priv->rx_data_slot_cnt);
 856                if (err)
 857                        goto free_qpls;
 858        }
 859
 860        priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
 861                                     sizeof(unsigned long) * BITS_PER_BYTE;
 862        priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
 863                                            sizeof(unsigned long), GFP_KERNEL);
 864        if (!priv->qpl_cfg.qpl_id_map) {
 865                err = -ENOMEM;
 866                goto free_qpls;
 867        }
 868
 869        return 0;
 870
 871free_qpls:
 872        for (j = 0; j <= i; j++)
 873                gve_free_queue_page_list(priv, j);
 874        kvfree(priv->qpls);
 875        return err;
 876}
 877
 878static void gve_free_qpls(struct gve_priv *priv)
 879{
 880        int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
 881        int i;
 882
 883        /* Raw addressing means no QPLs */
 884        if (priv->queue_format == GVE_GQI_RDA_FORMAT)
 885                return;
 886
 887        kvfree(priv->qpl_cfg.qpl_id_map);
 888
 889        for (i = 0; i < num_qpls; i++)
 890                gve_free_queue_page_list(priv, i);
 891
 892        kvfree(priv->qpls);
 893}
 894
 895/* Use this to schedule a reset when the device is capable of continuing
 896 * to handle other requests in its current state. If it is not, do a reset
 897 * in thread instead.
 898 */
 899void gve_schedule_reset(struct gve_priv *priv)
 900{
 901        gve_set_do_reset(priv);
 902        queue_work(priv->gve_wq, &priv->service_task);
 903}
 904
 905static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
 906static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
 907static void gve_turndown(struct gve_priv *priv);
 908static void gve_turnup(struct gve_priv *priv);
 909
 910static int gve_open(struct net_device *dev)
 911{
 912        struct gve_priv *priv = netdev_priv(dev);
 913        int err;
 914
 915        err = gve_alloc_qpls(priv);
 916        if (err)
 917                return err;
 918
 919        err = gve_alloc_rings(priv);
 920        if (err)
 921                goto free_qpls;
 922
 923        err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
 924        if (err)
 925                goto free_rings;
 926        err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
 927        if (err)
 928                goto free_rings;
 929
 930        err = gve_register_qpls(priv);
 931        if (err)
 932                goto reset;
 933
 934        if (!gve_is_gqi(priv)) {
 935                /* Hard code this for now. This may be tuned in the future for
 936                 * performance.
 937                 */
 938                priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
 939        }
 940        err = gve_create_rings(priv);
 941        if (err)
 942                goto reset;
 943
 944        gve_set_device_rings_ok(priv);
 945
 946        if (gve_get_report_stats(priv))
 947                mod_timer(&priv->stats_report_timer,
 948                          round_jiffies(jiffies +
 949                                msecs_to_jiffies(priv->stats_report_timer_period)));
 950
 951        gve_turnup(priv);
 952        queue_work(priv->gve_wq, &priv->service_task);
 953        priv->interface_up_cnt++;
 954        return 0;
 955
 956free_rings:
 957        gve_free_rings(priv);
 958free_qpls:
 959        gve_free_qpls(priv);
 960        return err;
 961
 962reset:
 963        /* This must have been called from a reset due to the rtnl lock
 964         * so just return at this point.
 965         */
 966        if (gve_get_reset_in_progress(priv))
 967                return err;
 968        /* Otherwise reset before returning */
 969        gve_reset_and_teardown(priv, true);
 970        /* if this fails there is nothing we can do so just ignore the return */
 971        gve_reset_recovery(priv, false);
 972        /* return the original error */
 973        return err;
 974}
 975
 976static int gve_close(struct net_device *dev)
 977{
 978        struct gve_priv *priv = netdev_priv(dev);
 979        int err;
 980
 981        netif_carrier_off(dev);
 982        if (gve_get_device_rings_ok(priv)) {
 983                gve_turndown(priv);
 984                err = gve_destroy_rings(priv);
 985                if (err)
 986                        goto err;
 987                err = gve_unregister_qpls(priv);
 988                if (err)
 989                        goto err;
 990                gve_clear_device_rings_ok(priv);
 991        }
 992        del_timer_sync(&priv->stats_report_timer);
 993
 994        gve_free_rings(priv);
 995        gve_free_qpls(priv);
 996        priv->interface_down_cnt++;
 997        return 0;
 998
 999err:
1000        /* This must have been called from a reset due to the rtnl lock
1001         * so just return at this point.
1002         */
1003        if (gve_get_reset_in_progress(priv))
1004                return err;
1005        /* Otherwise reset before returning */
1006        gve_reset_and_teardown(priv, true);
1007        return gve_reset_recovery(priv, false);
1008}
1009
1010int gve_adjust_queues(struct gve_priv *priv,
1011                      struct gve_queue_config new_rx_config,
1012                      struct gve_queue_config new_tx_config)
1013{
1014        int err;
1015
1016        if (netif_carrier_ok(priv->dev)) {
1017                /* To make this process as simple as possible we teardown the
1018                 * device, set the new configuration, and then bring the device
1019                 * up again.
1020                 */
1021                err = gve_close(priv->dev);
1022                /* we have already tried to reset in close,
1023                 * just fail at this point
1024                 */
1025                if (err)
1026                        return err;
1027                priv->tx_cfg = new_tx_config;
1028                priv->rx_cfg = new_rx_config;
1029
1030                err = gve_open(priv->dev);
1031                if (err)
1032                        goto err;
1033
1034                return 0;
1035        }
1036        /* Set the config for the next up. */
1037        priv->tx_cfg = new_tx_config;
1038        priv->rx_cfg = new_rx_config;
1039
1040        return 0;
1041err:
1042        netif_err(priv, drv, priv->dev,
1043                  "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1044        gve_turndown(priv);
1045        return err;
1046}
1047
1048static void gve_turndown(struct gve_priv *priv)
1049{
1050        int idx;
1051
1052        if (netif_carrier_ok(priv->dev))
1053                netif_carrier_off(priv->dev);
1054
1055        if (!gve_get_napi_enabled(priv))
1056                return;
1057
1058        /* Disable napi to prevent more work from coming in */
1059        for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1060                int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1061                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1062
1063                napi_disable(&block->napi);
1064        }
1065        for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1066                int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1067                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1068
1069                napi_disable(&block->napi);
1070        }
1071
1072        /* Stop tx queues */
1073        netif_tx_disable(priv->dev);
1074
1075        gve_clear_napi_enabled(priv);
1076        gve_clear_report_stats(priv);
1077}
1078
1079static void gve_turnup(struct gve_priv *priv)
1080{
1081        int idx;
1082
1083        /* Start the tx queues */
1084        netif_tx_start_all_queues(priv->dev);
1085
1086        /* Enable napi and unmask interrupts for all queues */
1087        for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1088                int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1089                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1090
1091                napi_enable(&block->napi);
1092                if (gve_is_gqi(priv)) {
1093                        iowrite32be(0, gve_irq_doorbell(priv, block));
1094                } else {
1095                        u32 val = gve_set_itr_ratelimit_dqo(GVE_TX_IRQ_RATELIMIT_US_DQO);
1096
1097                        gve_write_irq_doorbell_dqo(priv, block, val);
1098                }
1099        }
1100        for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1101                int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1102                struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1103
1104                napi_enable(&block->napi);
1105                if (gve_is_gqi(priv)) {
1106                        iowrite32be(0, gve_irq_doorbell(priv, block));
1107                } else {
1108                        u32 val = gve_set_itr_ratelimit_dqo(GVE_RX_IRQ_RATELIMIT_US_DQO);
1109
1110                        gve_write_irq_doorbell_dqo(priv, block, val);
1111                }
1112        }
1113
1114        gve_set_napi_enabled(priv);
1115}
1116
1117static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1118{
1119        struct gve_priv *priv = netdev_priv(dev);
1120
1121        gve_schedule_reset(priv);
1122        priv->tx_timeo_cnt++;
1123}
1124
1125static int gve_set_features(struct net_device *netdev,
1126                            netdev_features_t features)
1127{
1128        const netdev_features_t orig_features = netdev->features;
1129        struct gve_priv *priv = netdev_priv(netdev);
1130        int err;
1131
1132        if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1133                netdev->features ^= NETIF_F_LRO;
1134                if (netif_carrier_ok(netdev)) {
1135                        /* To make this process as simple as possible we
1136                         * teardown the device, set the new configuration,
1137                         * and then bring the device up again.
1138                         */
1139                        err = gve_close(netdev);
1140                        /* We have already tried to reset in close, just fail
1141                         * at this point.
1142                         */
1143                        if (err)
1144                                goto err;
1145
1146                        err = gve_open(netdev);
1147                        if (err)
1148                                goto err;
1149                }
1150        }
1151
1152        return 0;
1153err:
1154        /* Reverts the change on error. */
1155        netdev->features = orig_features;
1156        netif_err(priv, drv, netdev,
1157                  "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1158        return err;
1159}
1160
1161static const struct net_device_ops gve_netdev_ops = {
1162        .ndo_start_xmit         =       gve_start_xmit,
1163        .ndo_open               =       gve_open,
1164        .ndo_stop               =       gve_close,
1165        .ndo_get_stats64        =       gve_get_stats,
1166        .ndo_tx_timeout         =       gve_tx_timeout,
1167        .ndo_set_features       =       gve_set_features,
1168};
1169
1170static void gve_handle_status(struct gve_priv *priv, u32 status)
1171{
1172        if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1173                dev_info(&priv->pdev->dev, "Device requested reset.\n");
1174                gve_set_do_reset(priv);
1175        }
1176        if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1177                priv->stats_report_trigger_cnt++;
1178                gve_set_do_report_stats(priv);
1179        }
1180}
1181
1182static void gve_handle_reset(struct gve_priv *priv)
1183{
1184        /* A service task will be scheduled at the end of probe to catch any
1185         * resets that need to happen, and we don't want to reset until
1186         * probe is done.
1187         */
1188        if (gve_get_probe_in_progress(priv))
1189                return;
1190
1191        if (gve_get_do_reset(priv)) {
1192                rtnl_lock();
1193                gve_reset(priv, false);
1194                rtnl_unlock();
1195        }
1196}
1197
1198void gve_handle_report_stats(struct gve_priv *priv)
1199{
1200        struct stats *stats = priv->stats_report->stats;
1201        int idx, stats_idx = 0;
1202        unsigned int start = 0;
1203        u64 tx_bytes;
1204
1205        if (!gve_get_report_stats(priv))
1206                return;
1207
1208        be64_add_cpu(&priv->stats_report->written_count, 1);
1209        /* tx stats */
1210        if (priv->tx) {
1211                for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1212                        u32 last_completion = 0;
1213                        u32 tx_frames = 0;
1214
1215                        /* DQO doesn't currently support these metrics. */
1216                        if (gve_is_gqi(priv)) {
1217                                last_completion = priv->tx[idx].done;
1218                                tx_frames = priv->tx[idx].req;
1219                        }
1220
1221                        do {
1222                                start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1223                                tx_bytes = priv->tx[idx].bytes_done;
1224                        } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1225                        stats[stats_idx++] = (struct stats) {
1226                                .stat_name = cpu_to_be32(TX_WAKE_CNT),
1227                                .value = cpu_to_be64(priv->tx[idx].wake_queue),
1228                                .queue_id = cpu_to_be32(idx),
1229                        };
1230                        stats[stats_idx++] = (struct stats) {
1231                                .stat_name = cpu_to_be32(TX_STOP_CNT),
1232                                .value = cpu_to_be64(priv->tx[idx].stop_queue),
1233                                .queue_id = cpu_to_be32(idx),
1234                        };
1235                        stats[stats_idx++] = (struct stats) {
1236                                .stat_name = cpu_to_be32(TX_FRAMES_SENT),
1237                                .value = cpu_to_be64(tx_frames),
1238                                .queue_id = cpu_to_be32(idx),
1239                        };
1240                        stats[stats_idx++] = (struct stats) {
1241                                .stat_name = cpu_to_be32(TX_BYTES_SENT),
1242                                .value = cpu_to_be64(tx_bytes),
1243                                .queue_id = cpu_to_be32(idx),
1244                        };
1245                        stats[stats_idx++] = (struct stats) {
1246                                .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1247                                .value = cpu_to_be64(last_completion),
1248                                .queue_id = cpu_to_be32(idx),
1249                        };
1250                }
1251        }
1252        /* rx stats */
1253        if (priv->rx) {
1254                for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1255                        stats[stats_idx++] = (struct stats) {
1256                                .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1257                                .value = cpu_to_be64(priv->rx[idx].desc.seqno),
1258                                .queue_id = cpu_to_be32(idx),
1259                        };
1260                        stats[stats_idx++] = (struct stats) {
1261                                .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1262                                .value = cpu_to_be64(priv->rx[0].fill_cnt),
1263                                .queue_id = cpu_to_be32(idx),
1264                        };
1265                }
1266        }
1267}
1268
1269static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1270{
1271        if (!gve_get_napi_enabled(priv))
1272                return;
1273
1274        if (link_status == netif_carrier_ok(priv->dev))
1275                return;
1276
1277        if (link_status) {
1278                netdev_info(priv->dev, "Device link is up.\n");
1279                netif_carrier_on(priv->dev);
1280        } else {
1281                netdev_info(priv->dev, "Device link is down.\n");
1282                netif_carrier_off(priv->dev);
1283        }
1284}
1285
1286/* Handle NIC status register changes, reset requests and report stats */
1287static void gve_service_task(struct work_struct *work)
1288{
1289        struct gve_priv *priv = container_of(work, struct gve_priv,
1290                                             service_task);
1291        u32 status = ioread32be(&priv->reg_bar0->device_status);
1292
1293        gve_handle_status(priv, status);
1294
1295        gve_handle_reset(priv);
1296        gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1297}
1298
1299static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
1300{
1301        int num_ntfy;
1302        int err;
1303
1304        /* Set up the adminq */
1305        err = gve_adminq_alloc(&priv->pdev->dev, priv);
1306        if (err) {
1307                dev_err(&priv->pdev->dev,
1308                        "Failed to alloc admin queue: err=%d\n", err);
1309                return err;
1310        }
1311
1312        if (skip_describe_device)
1313                goto setup_device;
1314
1315        priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
1316        /* Get the initial information we need from the device */
1317        err = gve_adminq_describe_device(priv);
1318        if (err) {
1319                dev_err(&priv->pdev->dev,
1320                        "Could not get device information: err=%d\n", err);
1321                goto err;
1322        }
1323        if (gve_is_gqi(priv) && priv->dev->max_mtu > PAGE_SIZE) {
1324                priv->dev->max_mtu = PAGE_SIZE;
1325                err = gve_adminq_set_mtu(priv, priv->dev->mtu);
1326                if (err) {
1327                        dev_err(&priv->pdev->dev, "Could not set mtu");
1328                        goto err;
1329                }
1330        }
1331        priv->dev->mtu = priv->dev->max_mtu;
1332        num_ntfy = pci_msix_vec_count(priv->pdev);
1333        if (num_ntfy <= 0) {
1334                dev_err(&priv->pdev->dev,
1335                        "could not count MSI-x vectors: err=%d\n", num_ntfy);
1336                err = num_ntfy;
1337                goto err;
1338        } else if (num_ntfy < GVE_MIN_MSIX) {
1339                dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
1340                        GVE_MIN_MSIX, num_ntfy);
1341                err = -EINVAL;
1342                goto err;
1343        }
1344
1345        priv->num_registered_pages = 0;
1346        priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
1347        /* gvnic has one Notification Block per MSI-x vector, except for the
1348         * management vector
1349         */
1350        priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
1351        priv->mgmt_msix_idx = priv->num_ntfy_blks;
1352
1353        priv->tx_cfg.max_queues =
1354                min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
1355        priv->rx_cfg.max_queues =
1356                min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
1357
1358        priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
1359        priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
1360        if (priv->default_num_queues > 0) {
1361                priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
1362                                                priv->tx_cfg.num_queues);
1363                priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
1364                                                priv->rx_cfg.num_queues);
1365        }
1366
1367        dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
1368                 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
1369        dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
1370                 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
1371
1372setup_device:
1373        err = gve_setup_device_resources(priv);
1374        if (!err)
1375                return 0;
1376err:
1377        gve_adminq_free(&priv->pdev->dev, priv);
1378        return err;
1379}
1380
1381static void gve_teardown_priv_resources(struct gve_priv *priv)
1382{
1383        gve_teardown_device_resources(priv);
1384        gve_adminq_free(&priv->pdev->dev, priv);
1385}
1386
1387static void gve_trigger_reset(struct gve_priv *priv)
1388{
1389        /* Reset the device by releasing the AQ */
1390        gve_adminq_release(priv);
1391}
1392
1393static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
1394{
1395        gve_trigger_reset(priv);
1396        /* With the reset having already happened, close cannot fail */
1397        if (was_up)
1398                gve_close(priv->dev);
1399        gve_teardown_priv_resources(priv);
1400}
1401
1402static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
1403{
1404        int err;
1405
1406        err = gve_init_priv(priv, true);
1407        if (err)
1408                goto err;
1409        if (was_up) {
1410                err = gve_open(priv->dev);
1411                if (err)
1412                        goto err;
1413        }
1414        return 0;
1415err:
1416        dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1417        gve_turndown(priv);
1418        return err;
1419}
1420
1421int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1422{
1423        bool was_up = netif_carrier_ok(priv->dev);
1424        int err;
1425
1426        dev_info(&priv->pdev->dev, "Performing reset\n");
1427        gve_clear_do_reset(priv);
1428        gve_set_reset_in_progress(priv);
1429        /* If we aren't attempting to teardown normally, just go turndown and
1430         * reset right away.
1431         */
1432        if (!attempt_teardown) {
1433                gve_turndown(priv);
1434                gve_reset_and_teardown(priv, was_up);
1435        } else {
1436                /* Otherwise attempt to close normally */
1437                if (was_up) {
1438                        err = gve_close(priv->dev);
1439                        /* If that fails reset as we did above */
1440                        if (err)
1441                                gve_reset_and_teardown(priv, was_up);
1442                }
1443                /* Clean up any remaining resources */
1444                gve_teardown_priv_resources(priv);
1445        }
1446
1447        /* Set it all back up */
1448        err = gve_reset_recovery(priv, was_up);
1449        gve_clear_reset_in_progress(priv);
1450        priv->reset_cnt++;
1451        priv->interface_up_cnt = 0;
1452        priv->interface_down_cnt = 0;
1453        priv->stats_report_trigger_cnt = 0;
1454        return err;
1455}
1456
1457static void gve_write_version(u8 __iomem *driver_version_register)
1458{
1459        const char *c = gve_version_prefix;
1460
1461        while (*c) {
1462                writeb(*c, driver_version_register);
1463                c++;
1464        }
1465
1466        c = gve_version_str;
1467        while (*c) {
1468                writeb(*c, driver_version_register);
1469                c++;
1470        }
1471        writeb('\n', driver_version_register);
1472}
1473
1474static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1475{
1476        int max_tx_queues, max_rx_queues;
1477        struct net_device *dev;
1478        __be32 __iomem *db_bar;
1479        struct gve_registers __iomem *reg_bar;
1480        struct gve_priv *priv;
1481        int err;
1482
1483        err = pci_enable_device(pdev);
1484        if (err)
1485                return err;
1486
1487        err = pci_request_regions(pdev, "gvnic-cfg");
1488        if (err)
1489                goto abort_with_enabled;
1490
1491        pci_set_master(pdev);
1492
1493        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
1494        if (err) {
1495                dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1496                goto abort_with_pci_region;
1497        }
1498
1499        reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1500        if (!reg_bar) {
1501                dev_err(&pdev->dev, "Failed to map pci bar!\n");
1502                err = -ENOMEM;
1503                goto abort_with_pci_region;
1504        }
1505
1506        db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1507        if (!db_bar) {
1508                dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1509                err = -ENOMEM;
1510                goto abort_with_reg_bar;
1511        }
1512
1513        gve_write_version(&reg_bar->driver_version);
1514        /* Get max queues to alloc etherdev */
1515        max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
1516        max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
1517        /* Alloc and setup the netdev and priv */
1518        dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1519        if (!dev) {
1520                dev_err(&pdev->dev, "could not allocate netdev\n");
1521                err = -ENOMEM;
1522                goto abort_with_db_bar;
1523        }
1524        SET_NETDEV_DEV(dev, &pdev->dev);
1525        pci_set_drvdata(pdev, dev);
1526        dev->ethtool_ops = &gve_ethtool_ops;
1527        dev->netdev_ops = &gve_netdev_ops;
1528
1529        /* Set default and supported features.
1530         *
1531         * Features might be set in other locations as well (such as
1532         * `gve_adminq_describe_device`).
1533         */
1534        dev->hw_features = NETIF_F_HIGHDMA;
1535        dev->hw_features |= NETIF_F_SG;
1536        dev->hw_features |= NETIF_F_HW_CSUM;
1537        dev->hw_features |= NETIF_F_TSO;
1538        dev->hw_features |= NETIF_F_TSO6;
1539        dev->hw_features |= NETIF_F_TSO_ECN;
1540        dev->hw_features |= NETIF_F_RXCSUM;
1541        dev->hw_features |= NETIF_F_RXHASH;
1542        dev->features = dev->hw_features;
1543        dev->watchdog_timeo = 5 * HZ;
1544        dev->min_mtu = ETH_MIN_MTU;
1545        netif_carrier_off(dev);
1546
1547        priv = netdev_priv(dev);
1548        priv->dev = dev;
1549        priv->pdev = pdev;
1550        priv->msg_enable = DEFAULT_MSG_LEVEL;
1551        priv->reg_bar0 = reg_bar;
1552        priv->db_bar2 = db_bar;
1553        priv->service_task_flags = 0x0;
1554        priv->state_flags = 0x0;
1555        priv->ethtool_flags = 0x0;
1556
1557        gve_set_probe_in_progress(priv);
1558        priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1559        if (!priv->gve_wq) {
1560                dev_err(&pdev->dev, "Could not allocate workqueue");
1561                err = -ENOMEM;
1562                goto abort_with_netdev;
1563        }
1564        INIT_WORK(&priv->service_task, gve_service_task);
1565        INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
1566        priv->tx_cfg.max_queues = max_tx_queues;
1567        priv->rx_cfg.max_queues = max_rx_queues;
1568
1569        err = gve_init_priv(priv, false);
1570        if (err)
1571                goto abort_with_wq;
1572
1573        err = register_netdev(dev);
1574        if (err)
1575                goto abort_with_gve_init;
1576
1577        dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1578        dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
1579        gve_clear_probe_in_progress(priv);
1580        queue_work(priv->gve_wq, &priv->service_task);
1581        return 0;
1582
1583abort_with_gve_init:
1584        gve_teardown_priv_resources(priv);
1585
1586abort_with_wq:
1587        destroy_workqueue(priv->gve_wq);
1588
1589abort_with_netdev:
1590        free_netdev(dev);
1591
1592abort_with_db_bar:
1593        pci_iounmap(pdev, db_bar);
1594
1595abort_with_reg_bar:
1596        pci_iounmap(pdev, reg_bar);
1597
1598abort_with_pci_region:
1599        pci_release_regions(pdev);
1600
1601abort_with_enabled:
1602        pci_disable_device(pdev);
1603        return err;
1604}
1605
1606static void gve_remove(struct pci_dev *pdev)
1607{
1608        struct net_device *netdev = pci_get_drvdata(pdev);
1609        struct gve_priv *priv = netdev_priv(netdev);
1610        __be32 __iomem *db_bar = priv->db_bar2;
1611        void __iomem *reg_bar = priv->reg_bar0;
1612
1613        unregister_netdev(netdev);
1614        gve_teardown_priv_resources(priv);
1615        destroy_workqueue(priv->gve_wq);
1616        free_netdev(netdev);
1617        pci_iounmap(pdev, db_bar);
1618        pci_iounmap(pdev, reg_bar);
1619        pci_release_regions(pdev);
1620        pci_disable_device(pdev);
1621}
1622
1623static const struct pci_device_id gve_id_table[] = {
1624        { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1625        { }
1626};
1627
1628static struct pci_driver gvnic_driver = {
1629        .name           = "gvnic",
1630        .id_table       = gve_id_table,
1631        .probe          = gve_probe,
1632        .remove         = gve_remove,
1633};
1634
1635module_pci_driver(gvnic_driver);
1636
1637MODULE_DEVICE_TABLE(pci, gve_id_table);
1638MODULE_AUTHOR("Google, Inc.");
1639MODULE_DESCRIPTION("gVNIC Driver");
1640MODULE_LICENSE("Dual MIT/GPL");
1641MODULE_VERSION(GVE_VERSION);
1642