linux/drivers/net/ethernet/google/gve/gve_rx.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
   2/* Google virtual Ethernet (gve) driver
   3 *
   4 * Copyright (C) 2015-2021 Google, Inc.
   5 */
   6
   7#include "gve.h"
   8#include "gve_adminq.h"
   9#include "gve_utils.h"
  10#include <linux/etherdevice.h>
  11
  12static void gve_rx_free_buffer(struct device *dev,
  13                               struct gve_rx_slot_page_info *page_info,
  14                               union gve_rx_data_slot *data_slot)
  15{
  16        dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
  17                                      GVE_DATA_SLOT_ADDR_PAGE_MASK);
  18
  19        page_ref_sub(page_info->page, page_info->pagecnt_bias - 1);
  20        gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
  21}
  22
  23static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
  24{
  25        u32 slots = rx->mask + 1;
  26        int i;
  27
  28        if (rx->data.raw_addressing) {
  29                for (i = 0; i < slots; i++)
  30                        gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
  31                                           &rx->data.data_ring[i]);
  32        } else {
  33                for (i = 0; i < slots; i++)
  34                        page_ref_sub(rx->data.page_info[i].page,
  35                                     rx->data.page_info[i].pagecnt_bias - 1);
  36                gve_unassign_qpl(priv, rx->data.qpl->id);
  37                rx->data.qpl = NULL;
  38        }
  39        kvfree(rx->data.page_info);
  40        rx->data.page_info = NULL;
  41}
  42
  43static void gve_rx_free_ring(struct gve_priv *priv, int idx)
  44{
  45        struct gve_rx_ring *rx = &priv->rx[idx];
  46        struct device *dev = &priv->pdev->dev;
  47        u32 slots = rx->mask + 1;
  48        size_t bytes;
  49
  50        gve_rx_remove_from_block(priv, idx);
  51
  52        bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
  53        dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
  54        rx->desc.desc_ring = NULL;
  55
  56        dma_free_coherent(dev, sizeof(*rx->q_resources),
  57                          rx->q_resources, rx->q_resources_bus);
  58        rx->q_resources = NULL;
  59
  60        gve_rx_unfill_pages(priv, rx);
  61
  62        bytes = sizeof(*rx->data.data_ring) * slots;
  63        dma_free_coherent(dev, bytes, rx->data.data_ring,
  64                          rx->data.data_bus);
  65        rx->data.data_ring = NULL;
  66        netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
  67}
  68
  69static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
  70                             dma_addr_t addr, struct page *page, __be64 *slot_addr)
  71{
  72        page_info->page = page;
  73        page_info->page_offset = 0;
  74        page_info->page_address = page_address(page);
  75        *slot_addr = cpu_to_be64(addr);
  76        /* The page already has 1 ref */
  77        page_ref_add(page, INT_MAX - 1);
  78        page_info->pagecnt_bias = INT_MAX;
  79}
  80
  81static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
  82                               struct gve_rx_slot_page_info *page_info,
  83                               union gve_rx_data_slot *data_slot)
  84{
  85        struct page *page;
  86        dma_addr_t dma;
  87        int err;
  88
  89        err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
  90                             GFP_ATOMIC);
  91        if (err)
  92                return err;
  93
  94        gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
  95        return 0;
  96}
  97
  98static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
  99{
 100        struct gve_priv *priv = rx->gve;
 101        u32 slots;
 102        int err;
 103        int i;
 104
 105        /* Allocate one page per Rx queue slot. Each page is split into two
 106         * packet buffers, when possible we "page flip" between the two.
 107         */
 108        slots = rx->mask + 1;
 109
 110        rx->data.page_info = kvzalloc(slots *
 111                                      sizeof(*rx->data.page_info), GFP_KERNEL);
 112        if (!rx->data.page_info)
 113                return -ENOMEM;
 114
 115        if (!rx->data.raw_addressing) {
 116                rx->data.qpl = gve_assign_rx_qpl(priv);
 117                if (!rx->data.qpl) {
 118                        kvfree(rx->data.page_info);
 119                        rx->data.page_info = NULL;
 120                        return -ENOMEM;
 121                }
 122        }
 123        for (i = 0; i < slots; i++) {
 124                if (!rx->data.raw_addressing) {
 125                        struct page *page = rx->data.qpl->pages[i];
 126                        dma_addr_t addr = i * PAGE_SIZE;
 127
 128                        gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
 129                                            &rx->data.data_ring[i].qpl_offset);
 130                        continue;
 131                }
 132                err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
 133                                          &rx->data.data_ring[i]);
 134                if (err)
 135                        goto alloc_err;
 136        }
 137
 138        return slots;
 139alloc_err:
 140        while (i--)
 141                gve_rx_free_buffer(&priv->pdev->dev,
 142                                   &rx->data.page_info[i],
 143                                   &rx->data.data_ring[i]);
 144        return err;
 145}
 146
 147static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx)
 148{
 149        ctx->curr_frag_cnt = 0;
 150        ctx->total_expected_size = 0;
 151        ctx->expected_frag_cnt = 0;
 152        ctx->skb_head = NULL;
 153        ctx->skb_tail = NULL;
 154        ctx->reuse_frags = false;
 155}
 156
 157static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
 158{
 159        struct gve_rx_ring *rx = &priv->rx[idx];
 160        struct device *hdev = &priv->pdev->dev;
 161        u32 slots, npages;
 162        int filled_pages;
 163        size_t bytes;
 164        int err;
 165
 166        netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
 167        /* Make sure everything is zeroed to start with */
 168        memset(rx, 0, sizeof(*rx));
 169
 170        rx->gve = priv;
 171        rx->q_num = idx;
 172
 173        slots = priv->rx_data_slot_cnt;
 174        rx->mask = slots - 1;
 175        rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
 176
 177        /* alloc rx data ring */
 178        bytes = sizeof(*rx->data.data_ring) * slots;
 179        rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
 180                                                &rx->data.data_bus,
 181                                                GFP_KERNEL);
 182        if (!rx->data.data_ring)
 183                return -ENOMEM;
 184        filled_pages = gve_prefill_rx_pages(rx);
 185        if (filled_pages < 0) {
 186                err = -ENOMEM;
 187                goto abort_with_slots;
 188        }
 189        rx->fill_cnt = filled_pages;
 190        /* Ensure data ring slots (packet buffers) are visible. */
 191        dma_wmb();
 192
 193        /* Alloc gve_queue_resources */
 194        rx->q_resources =
 195                dma_alloc_coherent(hdev,
 196                                   sizeof(*rx->q_resources),
 197                                   &rx->q_resources_bus,
 198                                   GFP_KERNEL);
 199        if (!rx->q_resources) {
 200                err = -ENOMEM;
 201                goto abort_filled;
 202        }
 203        netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
 204                  (unsigned long)rx->data.data_bus);
 205
 206        /* alloc rx desc ring */
 207        bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
 208        npages = bytes / PAGE_SIZE;
 209        if (npages * PAGE_SIZE != bytes) {
 210                err = -EIO;
 211                goto abort_with_q_resources;
 212        }
 213
 214        rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
 215                                                GFP_KERNEL);
 216        if (!rx->desc.desc_ring) {
 217                err = -ENOMEM;
 218                goto abort_with_q_resources;
 219        }
 220        rx->cnt = 0;
 221        rx->db_threshold = priv->rx_desc_cnt / 2;
 222        rx->desc.seqno = 1;
 223
 224        /* Allocating half-page buffers allows page-flipping which is faster
 225         * than copying or allocating new pages.
 226         */
 227        rx->packet_buffer_size = PAGE_SIZE / 2;
 228        gve_rx_ctx_clear(&rx->ctx);
 229        gve_rx_add_to_block(priv, idx);
 230
 231        return 0;
 232
 233abort_with_q_resources:
 234        dma_free_coherent(hdev, sizeof(*rx->q_resources),
 235                          rx->q_resources, rx->q_resources_bus);
 236        rx->q_resources = NULL;
 237abort_filled:
 238        gve_rx_unfill_pages(priv, rx);
 239abort_with_slots:
 240        bytes = sizeof(*rx->data.data_ring) * slots;
 241        dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
 242        rx->data.data_ring = NULL;
 243
 244        return err;
 245}
 246
 247int gve_rx_alloc_rings(struct gve_priv *priv)
 248{
 249        int err = 0;
 250        int i;
 251
 252        for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 253                err = gve_rx_alloc_ring(priv, i);
 254                if (err) {
 255                        netif_err(priv, drv, priv->dev,
 256                                  "Failed to alloc rx ring=%d: err=%d\n",
 257                                  i, err);
 258                        break;
 259                }
 260        }
 261        /* Unallocate if there was an error */
 262        if (err) {
 263                int j;
 264
 265                for (j = 0; j < i; j++)
 266                        gve_rx_free_ring(priv, j);
 267        }
 268        return err;
 269}
 270
 271void gve_rx_free_rings_gqi(struct gve_priv *priv)
 272{
 273        int i;
 274
 275        for (i = 0; i < priv->rx_cfg.num_queues; i++)
 276                gve_rx_free_ring(priv, i);
 277}
 278
 279void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
 280{
 281        u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
 282
 283        iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
 284}
 285
 286static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
 287{
 288        if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
 289                return PKT_HASH_TYPE_L4;
 290        if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
 291                return PKT_HASH_TYPE_L3;
 292        return PKT_HASH_TYPE_L2;
 293}
 294
 295static u16 gve_rx_ctx_padding(struct gve_rx_ctx *ctx)
 296{
 297        return (ctx->curr_frag_cnt == 0) ? GVE_RX_PAD : 0;
 298}
 299
 300static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
 301                                        struct gve_rx_slot_page_info *page_info,
 302                                        u16 packet_buffer_size, u16 len,
 303                                        struct gve_rx_ctx *ctx)
 304{
 305        u32 offset = page_info->page_offset +  gve_rx_ctx_padding(ctx);
 306        struct sk_buff *skb;
 307
 308        if (!ctx->skb_head)
 309                ctx->skb_head = napi_get_frags(napi);
 310
 311        if (unlikely(!ctx->skb_head))
 312                return NULL;
 313
 314        skb = ctx->skb_head;
 315        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page_info->page,
 316                        offset, len, packet_buffer_size);
 317
 318        return skb;
 319}
 320
 321static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
 322{
 323        const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
 324
 325        /* "flip" to other packet buffer on this page */
 326        page_info->page_offset ^= PAGE_SIZE / 2;
 327        *(slot_addr) ^= offset;
 328}
 329
 330static int gve_rx_can_recycle_buffer(struct gve_rx_slot_page_info *page_info)
 331{
 332        int pagecount = page_count(page_info->page);
 333
 334        /* This page is not being used by any SKBs - reuse */
 335        if (pagecount == page_info->pagecnt_bias)
 336                return 1;
 337        /* This page is still being used by an SKB - we can't reuse */
 338        else if (pagecount > page_info->pagecnt_bias)
 339                return 0;
 340        WARN(pagecount < page_info->pagecnt_bias,
 341             "Pagecount should never be less than the bias.");
 342        return -1;
 343}
 344
 345static struct sk_buff *
 346gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
 347                      struct gve_rx_slot_page_info *page_info, u16 len,
 348                      struct napi_struct *napi,
 349                      union gve_rx_data_slot *data_slot,
 350                      u16 packet_buffer_size, struct gve_rx_ctx *ctx)
 351{
 352        struct sk_buff *skb = gve_rx_add_frags(napi, page_info, packet_buffer_size, len, ctx);
 353
 354        if (!skb)
 355                return NULL;
 356
 357        /* Optimistically stop the kernel from freeing the page.
 358         * We will check again in refill to determine if we need to alloc a
 359         * new page.
 360         */
 361        gve_dec_pagecnt_bias(page_info);
 362
 363        return skb;
 364}
 365
 366static struct sk_buff *
 367gve_rx_qpl(struct device *dev, struct net_device *netdev,
 368           struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
 369           u16 len, struct napi_struct *napi,
 370           union gve_rx_data_slot *data_slot)
 371{
 372        struct gve_rx_ctx *ctx = &rx->ctx;
 373        struct sk_buff *skb;
 374
 375        /* if raw_addressing mode is not enabled gvnic can only receive into
 376         * registered segments. If the buffer can't be recycled, our only
 377         * choice is to copy the data out of it so that we can return it to the
 378         * device.
 379         */
 380        if (ctx->reuse_frags) {
 381                skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx);
 382                /* No point in recycling if we didn't get the skb */
 383                if (skb) {
 384                        /* Make sure that the page isn't freed. */
 385                        gve_dec_pagecnt_bias(page_info);
 386                        gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
 387                }
 388        } else {
 389                const u16 padding = gve_rx_ctx_padding(ctx);
 390
 391                skb = gve_rx_copy(netdev, napi, page_info, len, padding, ctx);
 392                if (skb) {
 393                        u64_stats_update_begin(&rx->statss);
 394                        rx->rx_frag_copy_cnt++;
 395                        u64_stats_update_end(&rx->statss);
 396                }
 397        }
 398        return skb;
 399}
 400
 401#define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x))
 402static u16 gve_rx_get_fragment_size(struct gve_rx_ctx *ctx, struct gve_rx_desc *desc)
 403{
 404        return be16_to_cpu(desc->len) - gve_rx_ctx_padding(ctx);
 405}
 406
 407static bool gve_rx_ctx_init(struct gve_rx_ctx *ctx, struct gve_rx_ring *rx)
 408{
 409        bool qpl_mode = !rx->data.raw_addressing, packet_size_error = false;
 410        bool buffer_error = false, desc_error = false, seqno_error = false;
 411        struct gve_rx_slot_page_info *page_info;
 412        struct gve_priv *priv = rx->gve;
 413        u32 idx = rx->cnt & rx->mask;
 414        bool reuse_frags, can_flip;
 415        struct gve_rx_desc *desc;
 416        u16 packet_size = 0;
 417        u16 n_frags = 0;
 418        int recycle;
 419
 420        /** In QPL mode, we only flip buffers when all buffers containing the packet
 421         * can be flipped. RDA can_flip decisions will be made later, per frag.
 422         */
 423        can_flip = qpl_mode;
 424        reuse_frags = can_flip;
 425        do {
 426                u16 frag_size;
 427
 428                n_frags++;
 429                desc = &rx->desc.desc_ring[idx];
 430                desc_error = unlikely(desc->flags_seq & GVE_RXF_ERR) || desc_error;
 431                if (GVE_SEQNO(desc->flags_seq) != rx->desc.seqno) {
 432                        seqno_error = true;
 433                        netdev_warn(priv->dev,
 434                                    "RX seqno error: want=%d, got=%d, dropping packet and scheduling reset.",
 435                                    rx->desc.seqno, GVE_SEQNO(desc->flags_seq));
 436                }
 437                frag_size = be16_to_cpu(desc->len);
 438                packet_size += frag_size;
 439                if (frag_size > rx->packet_buffer_size) {
 440                        packet_size_error = true;
 441                        netdev_warn(priv->dev,
 442                                    "RX fragment error: packet_buffer_size=%d, frag_size=%d, dropping packet.",
 443                                    rx->packet_buffer_size, be16_to_cpu(desc->len));
 444                }
 445                page_info = &rx->data.page_info[idx];
 446                if (can_flip) {
 447                        recycle = gve_rx_can_recycle_buffer(page_info);
 448                        reuse_frags = reuse_frags && recycle > 0;
 449                        buffer_error = buffer_error || unlikely(recycle < 0);
 450                }
 451                idx = (idx + 1) & rx->mask;
 452                rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
 453        } while (GVE_PKTCONT_BIT_IS_SET(desc->flags_seq));
 454
 455        prefetch(rx->desc.desc_ring + idx);
 456
 457        ctx->curr_frag_cnt = 0;
 458        ctx->total_expected_size = packet_size - GVE_RX_PAD;
 459        ctx->expected_frag_cnt = n_frags;
 460        ctx->skb_head = NULL;
 461        ctx->reuse_frags = reuse_frags;
 462
 463        if (ctx->expected_frag_cnt > 1) {
 464                u64_stats_update_begin(&rx->statss);
 465                rx->rx_cont_packet_cnt++;
 466                u64_stats_update_end(&rx->statss);
 467        }
 468        if (ctx->total_expected_size > priv->rx_copybreak && !ctx->reuse_frags && qpl_mode) {
 469                u64_stats_update_begin(&rx->statss);
 470                rx->rx_copied_pkt++;
 471                u64_stats_update_end(&rx->statss);
 472        }
 473
 474        if (unlikely(buffer_error || seqno_error || packet_size_error)) {
 475                gve_schedule_reset(priv);
 476                return false;
 477        }
 478
 479        if (unlikely(desc_error)) {
 480                u64_stats_update_begin(&rx->statss);
 481                rx->rx_desc_err_dropped_pkt++;
 482                u64_stats_update_end(&rx->statss);
 483                return false;
 484        }
 485        return true;
 486}
 487
 488static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
 489                                  struct gve_rx_slot_page_info *page_info, struct napi_struct *napi,
 490                                  u16 len, union gve_rx_data_slot *data_slot)
 491{
 492        struct net_device *netdev = priv->dev;
 493        struct gve_rx_ctx *ctx = &rx->ctx;
 494        struct sk_buff *skb = NULL;
 495
 496        if (len <= priv->rx_copybreak && ctx->expected_frag_cnt == 1) {
 497                /* Just copy small packets */
 498                skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD, ctx);
 499                if (skb) {
 500                        u64_stats_update_begin(&rx->statss);
 501                        rx->rx_copied_pkt++;
 502                        rx->rx_frag_copy_cnt++;
 503                        rx->rx_copybreak_pkt++;
 504                        u64_stats_update_end(&rx->statss);
 505                }
 506        } else {
 507                if (rx->data.raw_addressing) {
 508                        int recycle = gve_rx_can_recycle_buffer(page_info);
 509
 510                        if (unlikely(recycle < 0)) {
 511                                gve_schedule_reset(priv);
 512                                return NULL;
 513                        }
 514                        page_info->can_flip = recycle;
 515                        if (page_info->can_flip) {
 516                                u64_stats_update_begin(&rx->statss);
 517                                rx->rx_frag_flip_cnt++;
 518                                u64_stats_update_end(&rx->statss);
 519                        }
 520                        skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev,
 521                                                    page_info, len, napi,
 522                                                    data_slot,
 523                                                    rx->packet_buffer_size, ctx);
 524                } else {
 525                        if (ctx->reuse_frags) {
 526                                u64_stats_update_begin(&rx->statss);
 527                                rx->rx_frag_flip_cnt++;
 528                                u64_stats_update_end(&rx->statss);
 529                        }
 530                        skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx,
 531                                         page_info, len, napi, data_slot);
 532                }
 533        }
 534        return skb;
 535}
 536
 537static bool gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
 538                   u64 *packet_size_bytes, u32 *work_done)
 539{
 540        struct gve_rx_slot_page_info *page_info;
 541        struct gve_rx_ctx *ctx = &rx->ctx;
 542        union gve_rx_data_slot *data_slot;
 543        struct gve_priv *priv = rx->gve;
 544        struct gve_rx_desc *first_desc;
 545        struct sk_buff *skb = NULL;
 546        struct gve_rx_desc *desc;
 547        struct napi_struct *napi;
 548        dma_addr_t page_bus;
 549        u32 work_cnt = 0;
 550        void *va;
 551        u32 idx;
 552        u16 len;
 553
 554        idx = rx->cnt & rx->mask;
 555        first_desc = &rx->desc.desc_ring[idx];
 556        desc = first_desc;
 557        napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
 558
 559        if (unlikely(!gve_rx_ctx_init(ctx, rx)))
 560                goto skb_alloc_fail;
 561
 562        while (ctx->curr_frag_cnt < ctx->expected_frag_cnt) {
 563                /* Prefetch two packet buffers ahead, we will need it soon. */
 564                page_info = &rx->data.page_info[(idx + 2) & rx->mask];
 565                va = page_info->page_address + page_info->page_offset;
 566
 567                prefetch(page_info->page); /* Kernel page struct. */
 568                prefetch(va);              /* Packet header. */
 569                prefetch(va + 64);         /* Next cacheline too. */
 570
 571                len = gve_rx_get_fragment_size(ctx, desc);
 572
 573                page_info = &rx->data.page_info[idx];
 574                data_slot = &rx->data.data_ring[idx];
 575                page_bus = rx->data.raw_addressing ?
 576                           be64_to_cpu(data_slot->addr) - page_info->page_offset :
 577                           rx->data.qpl->page_buses[idx];
 578                dma_sync_single_for_cpu(&priv->pdev->dev, page_bus, PAGE_SIZE, DMA_FROM_DEVICE);
 579
 580                skb = gve_rx_skb(priv, rx, page_info, napi, len, data_slot);
 581                if (!skb) {
 582                        u64_stats_update_begin(&rx->statss);
 583                        rx->rx_skb_alloc_fail++;
 584                        u64_stats_update_end(&rx->statss);
 585                        goto skb_alloc_fail;
 586                }
 587
 588                ctx->curr_frag_cnt++;
 589                rx->cnt++;
 590                idx = rx->cnt & rx->mask;
 591                work_cnt++;
 592                desc = &rx->desc.desc_ring[idx];
 593        }
 594
 595        if (likely(feat & NETIF_F_RXCSUM)) {
 596                /* NIC passes up the partial sum */
 597                if (first_desc->csum)
 598                        skb->ip_summed = CHECKSUM_COMPLETE;
 599                else
 600                        skb->ip_summed = CHECKSUM_NONE;
 601                skb->csum = csum_unfold(first_desc->csum);
 602        }
 603
 604        /* parse flags & pass relevant info up */
 605        if (likely(feat & NETIF_F_RXHASH) &&
 606            gve_needs_rss(first_desc->flags_seq))
 607                skb_set_hash(skb, be32_to_cpu(first_desc->rss_hash),
 608                             gve_rss_type(first_desc->flags_seq));
 609
 610        *packet_size_bytes = skb->len + (skb->protocol ? ETH_HLEN : 0);
 611        *work_done = work_cnt;
 612        skb_record_rx_queue(skb, rx->q_num);
 613        if (skb_is_nonlinear(skb))
 614                napi_gro_frags(napi);
 615        else
 616                napi_gro_receive(napi, skb);
 617
 618        gve_rx_ctx_clear(ctx);
 619        return true;
 620
 621skb_alloc_fail:
 622        if (napi->skb)
 623                napi_free_frags(napi);
 624        *packet_size_bytes = 0;
 625        *work_done = ctx->expected_frag_cnt;
 626        while (ctx->curr_frag_cnt < ctx->expected_frag_cnt) {
 627                rx->cnt++;
 628                ctx->curr_frag_cnt++;
 629        }
 630        gve_rx_ctx_clear(ctx);
 631        return false;
 632}
 633
 634bool gve_rx_work_pending(struct gve_rx_ring *rx)
 635{
 636        struct gve_rx_desc *desc;
 637        __be16 flags_seq;
 638        u32 next_idx;
 639
 640        next_idx = rx->cnt & rx->mask;
 641        desc = rx->desc.desc_ring + next_idx;
 642
 643        flags_seq = desc->flags_seq;
 644
 645        return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
 646}
 647
 648static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
 649{
 650        int refill_target = rx->mask + 1;
 651        u32 fill_cnt = rx->fill_cnt;
 652
 653        while (fill_cnt - rx->cnt < refill_target) {
 654                struct gve_rx_slot_page_info *page_info;
 655                u32 idx = fill_cnt & rx->mask;
 656
 657                page_info = &rx->data.page_info[idx];
 658                if (page_info->can_flip) {
 659                        /* The other half of the page is free because it was
 660                         * free when we processed the descriptor. Flip to it.
 661                         */
 662                        union gve_rx_data_slot *data_slot =
 663                                                &rx->data.data_ring[idx];
 664
 665                        gve_rx_flip_buff(page_info, &data_slot->addr);
 666                        page_info->can_flip = 0;
 667                } else {
 668                        /* It is possible that the networking stack has already
 669                         * finished processing all outstanding packets in the buffer
 670                         * and it can be reused.
 671                         * Flipping is unnecessary here - if the networking stack still
 672                         * owns half the page it is impossible to tell which half. Either
 673                         * the whole page is free or it needs to be replaced.
 674                         */
 675                        int recycle = gve_rx_can_recycle_buffer(page_info);
 676
 677                        if (recycle < 0) {
 678                                if (!rx->data.raw_addressing)
 679                                        gve_schedule_reset(priv);
 680                                return false;
 681                        }
 682                        if (!recycle) {
 683                                /* We can't reuse the buffer - alloc a new one*/
 684                                union gve_rx_data_slot *data_slot =
 685                                                &rx->data.data_ring[idx];
 686                                struct device *dev = &priv->pdev->dev;
 687                                gve_rx_free_buffer(dev, page_info, data_slot);
 688                                page_info->page = NULL;
 689                                if (gve_rx_alloc_buffer(priv, dev, page_info,
 690                                                        data_slot)) {
 691                                        u64_stats_update_begin(&rx->statss);
 692                                        rx->rx_buf_alloc_fail++;
 693                                        u64_stats_update_end(&rx->statss);
 694                                        break;
 695                                }
 696                        }
 697                }
 698                fill_cnt++;
 699        }
 700        rx->fill_cnt = fill_cnt;
 701        return true;
 702}
 703
 704static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
 705                             netdev_features_t feat)
 706{
 707        u32 work_done = 0, total_packet_cnt = 0, ok_packet_cnt = 0;
 708        struct gve_priv *priv = rx->gve;
 709        u32 idx = rx->cnt & rx->mask;
 710        struct gve_rx_desc *desc;
 711        u64 bytes = 0;
 712
 713        desc = &rx->desc.desc_ring[idx];
 714        while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
 715               work_done < budget) {
 716                u64 packet_size_bytes = 0;
 717                u32 work_cnt = 0;
 718                bool dropped;
 719
 720                netif_info(priv, rx_status, priv->dev,
 721                           "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
 722                           rx->q_num, idx, desc, desc->flags_seq);
 723                netif_info(priv, rx_status, priv->dev,
 724                           "[%d] seqno=%d rx->desc.seqno=%d\n",
 725                           rx->q_num, GVE_SEQNO(desc->flags_seq),
 726                           rx->desc.seqno);
 727
 728                dropped = !gve_rx(rx, feat, &packet_size_bytes, &work_cnt);
 729                if (!dropped) {
 730                        bytes += packet_size_bytes;
 731                        ok_packet_cnt++;
 732                }
 733                total_packet_cnt++;
 734                idx = rx->cnt & rx->mask;
 735                desc = &rx->desc.desc_ring[idx];
 736                work_done += work_cnt;
 737        }
 738
 739        if (!work_done && rx->fill_cnt - rx->cnt > rx->db_threshold)
 740                return 0;
 741
 742        if (work_done) {
 743                u64_stats_update_begin(&rx->statss);
 744                rx->rpackets += ok_packet_cnt;
 745                rx->rbytes += bytes;
 746                u64_stats_update_end(&rx->statss);
 747        }
 748
 749        /* restock ring slots */
 750        if (!rx->data.raw_addressing) {
 751                /* In QPL mode buffs are refilled as the desc are processed */
 752                rx->fill_cnt += work_done;
 753        } else if (rx->fill_cnt - rx->cnt <= rx->db_threshold) {
 754                /* In raw addressing mode buffs are only refilled if the avail
 755                 * falls below a threshold.
 756                 */
 757                if (!gve_rx_refill_buffers(priv, rx))
 758                        return 0;
 759
 760                /* If we were not able to completely refill buffers, we'll want
 761                 * to schedule this queue for work again to refill buffers.
 762                 */
 763                if (rx->fill_cnt - rx->cnt <= rx->db_threshold) {
 764                        gve_rx_write_doorbell(priv, rx);
 765                        return budget;
 766                }
 767        }
 768
 769        gve_rx_write_doorbell(priv, rx);
 770        return total_packet_cnt;
 771}
 772
 773int gve_rx_poll(struct gve_notify_block *block, int budget)
 774{
 775        struct gve_rx_ring *rx = block->rx;
 776        netdev_features_t feat;
 777        int work_done = 0;
 778
 779        feat = block->napi.dev->features;
 780
 781        /* If budget is 0, do all the work */
 782        if (budget == 0)
 783                budget = INT_MAX;
 784
 785        if (budget > 0)
 786                work_done = gve_clean_rx_done(rx, budget, feat);
 787
 788        return work_done;
 789}
 790