linux/drivers/net/ethernet/google/gve/gve_rx.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
   2/* Google virtual Ethernet (gve) driver
   3 *
   4 * Copyright (C) 2015-2021 Google, Inc.
   5 */
   6
   7#include "gve.h"
   8#include "gve_adminq.h"
   9#include "gve_utils.h"
  10#include <linux/etherdevice.h>
  11
  12static void gve_rx_free_buffer(struct device *dev,
  13                               struct gve_rx_slot_page_info *page_info,
  14                               union gve_rx_data_slot *data_slot)
  15{
  16        dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
  17                                      GVE_DATA_SLOT_ADDR_PAGE_MASK);
  18
  19        gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
  20}
  21
  22static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
  23{
  24        if (rx->data.raw_addressing) {
  25                u32 slots = rx->mask + 1;
  26                int i;
  27
  28                for (i = 0; i < slots; i++)
  29                        gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
  30                                           &rx->data.data_ring[i]);
  31        } else {
  32                gve_unassign_qpl(priv, rx->data.qpl->id);
  33                rx->data.qpl = NULL;
  34        }
  35        kvfree(rx->data.page_info);
  36        rx->data.page_info = NULL;
  37}
  38
  39static void gve_rx_free_ring(struct gve_priv *priv, int idx)
  40{
  41        struct gve_rx_ring *rx = &priv->rx[idx];
  42        struct device *dev = &priv->pdev->dev;
  43        u32 slots = rx->mask + 1;
  44        size_t bytes;
  45
  46        gve_rx_remove_from_block(priv, idx);
  47
  48        bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
  49        dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
  50        rx->desc.desc_ring = NULL;
  51
  52        dma_free_coherent(dev, sizeof(*rx->q_resources),
  53                          rx->q_resources, rx->q_resources_bus);
  54        rx->q_resources = NULL;
  55
  56        gve_rx_unfill_pages(priv, rx);
  57
  58        bytes = sizeof(*rx->data.data_ring) * slots;
  59        dma_free_coherent(dev, bytes, rx->data.data_ring,
  60                          rx->data.data_bus);
  61        rx->data.data_ring = NULL;
  62        netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
  63}
  64
  65static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
  66                             dma_addr_t addr, struct page *page, __be64 *slot_addr)
  67{
  68        page_info->page = page;
  69        page_info->page_offset = 0;
  70        page_info->page_address = page_address(page);
  71        *slot_addr = cpu_to_be64(addr);
  72}
  73
  74static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
  75                               struct gve_rx_slot_page_info *page_info,
  76                               union gve_rx_data_slot *data_slot)
  77{
  78        struct page *page;
  79        dma_addr_t dma;
  80        int err;
  81
  82        err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE);
  83        if (err)
  84                return err;
  85
  86        gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
  87        return 0;
  88}
  89
  90static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
  91{
  92        struct gve_priv *priv = rx->gve;
  93        u32 slots;
  94        int err;
  95        int i;
  96
  97        /* Allocate one page per Rx queue slot. Each page is split into two
  98         * packet buffers, when possible we "page flip" between the two.
  99         */
 100        slots = rx->mask + 1;
 101
 102        rx->data.page_info = kvzalloc(slots *
 103                                      sizeof(*rx->data.page_info), GFP_KERNEL);
 104        if (!rx->data.page_info)
 105                return -ENOMEM;
 106
 107        if (!rx->data.raw_addressing) {
 108                rx->data.qpl = gve_assign_rx_qpl(priv);
 109                if (!rx->data.qpl) {
 110                        kvfree(rx->data.page_info);
 111                        rx->data.page_info = NULL;
 112                        return -ENOMEM;
 113                }
 114        }
 115        for (i = 0; i < slots; i++) {
 116                if (!rx->data.raw_addressing) {
 117                        struct page *page = rx->data.qpl->pages[i];
 118                        dma_addr_t addr = i * PAGE_SIZE;
 119
 120                        gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
 121                                            &rx->data.data_ring[i].qpl_offset);
 122                        continue;
 123                }
 124                err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
 125                                          &rx->data.data_ring[i]);
 126                if (err)
 127                        goto alloc_err;
 128        }
 129
 130        return slots;
 131alloc_err:
 132        while (i--)
 133                gve_rx_free_buffer(&priv->pdev->dev,
 134                                   &rx->data.page_info[i],
 135                                   &rx->data.data_ring[i]);
 136        return err;
 137}
 138
 139static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
 140{
 141        struct gve_rx_ring *rx = &priv->rx[idx];
 142        struct device *hdev = &priv->pdev->dev;
 143        u32 slots, npages;
 144        int filled_pages;
 145        size_t bytes;
 146        int err;
 147
 148        netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
 149        /* Make sure everything is zeroed to start with */
 150        memset(rx, 0, sizeof(*rx));
 151
 152        rx->gve = priv;
 153        rx->q_num = idx;
 154
 155        slots = priv->rx_data_slot_cnt;
 156        rx->mask = slots - 1;
 157        rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
 158
 159        /* alloc rx data ring */
 160        bytes = sizeof(*rx->data.data_ring) * slots;
 161        rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
 162                                                &rx->data.data_bus,
 163                                                GFP_KERNEL);
 164        if (!rx->data.data_ring)
 165                return -ENOMEM;
 166        filled_pages = gve_prefill_rx_pages(rx);
 167        if (filled_pages < 0) {
 168                err = -ENOMEM;
 169                goto abort_with_slots;
 170        }
 171        rx->fill_cnt = filled_pages;
 172        /* Ensure data ring slots (packet buffers) are visible. */
 173        dma_wmb();
 174
 175        /* Alloc gve_queue_resources */
 176        rx->q_resources =
 177                dma_alloc_coherent(hdev,
 178                                   sizeof(*rx->q_resources),
 179                                   &rx->q_resources_bus,
 180                                   GFP_KERNEL);
 181        if (!rx->q_resources) {
 182                err = -ENOMEM;
 183                goto abort_filled;
 184        }
 185        netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
 186                  (unsigned long)rx->data.data_bus);
 187
 188        /* alloc rx desc ring */
 189        bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
 190        npages = bytes / PAGE_SIZE;
 191        if (npages * PAGE_SIZE != bytes) {
 192                err = -EIO;
 193                goto abort_with_q_resources;
 194        }
 195
 196        rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
 197                                                GFP_KERNEL);
 198        if (!rx->desc.desc_ring) {
 199                err = -ENOMEM;
 200                goto abort_with_q_resources;
 201        }
 202        rx->cnt = 0;
 203        rx->db_threshold = priv->rx_desc_cnt / 2;
 204        rx->desc.seqno = 1;
 205        gve_rx_add_to_block(priv, idx);
 206
 207        return 0;
 208
 209abort_with_q_resources:
 210        dma_free_coherent(hdev, sizeof(*rx->q_resources),
 211                          rx->q_resources, rx->q_resources_bus);
 212        rx->q_resources = NULL;
 213abort_filled:
 214        gve_rx_unfill_pages(priv, rx);
 215abort_with_slots:
 216        bytes = sizeof(*rx->data.data_ring) * slots;
 217        dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
 218        rx->data.data_ring = NULL;
 219
 220        return err;
 221}
 222
 223int gve_rx_alloc_rings(struct gve_priv *priv)
 224{
 225        int err = 0;
 226        int i;
 227
 228        for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 229                err = gve_rx_alloc_ring(priv, i);
 230                if (err) {
 231                        netif_err(priv, drv, priv->dev,
 232                                  "Failed to alloc rx ring=%d: err=%d\n",
 233                                  i, err);
 234                        break;
 235                }
 236        }
 237        /* Unallocate if there was an error */
 238        if (err) {
 239                int j;
 240
 241                for (j = 0; j < i; j++)
 242                        gve_rx_free_ring(priv, j);
 243        }
 244        return err;
 245}
 246
 247void gve_rx_free_rings_gqi(struct gve_priv *priv)
 248{
 249        int i;
 250
 251        for (i = 0; i < priv->rx_cfg.num_queues; i++)
 252                gve_rx_free_ring(priv, i);
 253}
 254
 255void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
 256{
 257        u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
 258
 259        iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
 260}
 261
 262static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
 263{
 264        if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
 265                return PKT_HASH_TYPE_L4;
 266        if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
 267                return PKT_HASH_TYPE_L3;
 268        return PKT_HASH_TYPE_L2;
 269}
 270
 271static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
 272                                        struct gve_rx_slot_page_info *page_info,
 273                                        u16 len)
 274{
 275        struct sk_buff *skb = napi_get_frags(napi);
 276
 277        if (unlikely(!skb))
 278                return NULL;
 279
 280        skb_add_rx_frag(skb, 0, page_info->page,
 281                        page_info->page_offset +
 282                        GVE_RX_PAD, len, PAGE_SIZE / 2);
 283
 284        return skb;
 285}
 286
 287static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
 288{
 289        const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
 290
 291        /* "flip" to other packet buffer on this page */
 292        page_info->page_offset ^= PAGE_SIZE / 2;
 293        *(slot_addr) ^= offset;
 294}
 295
 296static bool gve_rx_can_flip_buffers(struct net_device *netdev)
 297{
 298        return PAGE_SIZE == 4096
 299                ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false;
 300}
 301
 302static int gve_rx_can_recycle_buffer(struct page *page)
 303{
 304        int pagecount = page_count(page);
 305
 306        /* This page is not being used by any SKBs - reuse */
 307        if (pagecount == 1)
 308                return 1;
 309        /* This page is still being used by an SKB - we can't reuse */
 310        else if (pagecount >= 2)
 311                return 0;
 312        WARN(pagecount < 1, "Pagecount should never be < 1");
 313        return -1;
 314}
 315
 316static struct sk_buff *
 317gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
 318                      struct gve_rx_slot_page_info *page_info, u16 len,
 319                      struct napi_struct *napi,
 320                      union gve_rx_data_slot *data_slot)
 321{
 322        struct sk_buff *skb;
 323
 324        skb = gve_rx_add_frags(napi, page_info, len);
 325        if (!skb)
 326                return NULL;
 327
 328        /* Optimistically stop the kernel from freeing the page by increasing
 329         * the page bias. We will check the refcount in refill to determine if
 330         * we need to alloc a new page.
 331         */
 332        get_page(page_info->page);
 333
 334        return skb;
 335}
 336
 337static struct sk_buff *
 338gve_rx_qpl(struct device *dev, struct net_device *netdev,
 339           struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
 340           u16 len, struct napi_struct *napi,
 341           union gve_rx_data_slot *data_slot)
 342{
 343        struct sk_buff *skb;
 344
 345        /* if raw_addressing mode is not enabled gvnic can only receive into
 346         * registered segments. If the buffer can't be recycled, our only
 347         * choice is to copy the data out of it so that we can return it to the
 348         * device.
 349         */
 350        if (page_info->can_flip) {
 351                skb = gve_rx_add_frags(napi, page_info, len);
 352                /* No point in recycling if we didn't get the skb */
 353                if (skb) {
 354                        /* Make sure that the page isn't freed. */
 355                        get_page(page_info->page);
 356                        gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
 357                }
 358        } else {
 359                skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD);
 360                if (skb) {
 361                        u64_stats_update_begin(&rx->statss);
 362                        rx->rx_copied_pkt++;
 363                        u64_stats_update_end(&rx->statss);
 364                }
 365        }
 366        return skb;
 367}
 368
 369static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 370                   netdev_features_t feat, u32 idx)
 371{
 372        struct gve_rx_slot_page_info *page_info;
 373        struct gve_priv *priv = rx->gve;
 374        struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
 375        struct net_device *dev = priv->dev;
 376        union gve_rx_data_slot *data_slot;
 377        struct sk_buff *skb = NULL;
 378        dma_addr_t page_bus;
 379        u16 len;
 380
 381        /* drop this packet */
 382        if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
 383                u64_stats_update_begin(&rx->statss);
 384                rx->rx_desc_err_dropped_pkt++;
 385                u64_stats_update_end(&rx->statss);
 386                return false;
 387        }
 388
 389        len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
 390        page_info = &rx->data.page_info[idx];
 391
 392        data_slot = &rx->data.data_ring[idx];
 393        page_bus = (rx->data.raw_addressing) ?
 394                        be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK :
 395                        rx->data.qpl->page_buses[idx];
 396        dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
 397                                PAGE_SIZE, DMA_FROM_DEVICE);
 398
 399        if (len <= priv->rx_copybreak) {
 400                /* Just copy small packets */
 401                skb = gve_rx_copy(dev, napi, page_info, len, GVE_RX_PAD);
 402                u64_stats_update_begin(&rx->statss);
 403                rx->rx_copied_pkt++;
 404                rx->rx_copybreak_pkt++;
 405                u64_stats_update_end(&rx->statss);
 406        } else {
 407                u8 can_flip = gve_rx_can_flip_buffers(dev);
 408                int recycle = 0;
 409
 410                if (can_flip) {
 411                        recycle = gve_rx_can_recycle_buffer(page_info->page);
 412                        if (recycle < 0) {
 413                                if (!rx->data.raw_addressing)
 414                                        gve_schedule_reset(priv);
 415                                return false;
 416                        }
 417                }
 418
 419                page_info->can_flip = can_flip && recycle;
 420                if (rx->data.raw_addressing) {
 421                        skb = gve_rx_raw_addressing(&priv->pdev->dev, dev,
 422                                                    page_info, len, napi,
 423                                                    data_slot);
 424                } else {
 425                        skb = gve_rx_qpl(&priv->pdev->dev, dev, rx,
 426                                         page_info, len, napi, data_slot);
 427                }
 428        }
 429
 430        if (!skb) {
 431                u64_stats_update_begin(&rx->statss);
 432                rx->rx_skb_alloc_fail++;
 433                u64_stats_update_end(&rx->statss);
 434                return false;
 435        }
 436
 437        if (likely(feat & NETIF_F_RXCSUM)) {
 438                /* NIC passes up the partial sum */
 439                if (rx_desc->csum)
 440                        skb->ip_summed = CHECKSUM_COMPLETE;
 441                else
 442                        skb->ip_summed = CHECKSUM_NONE;
 443                skb->csum = csum_unfold(rx_desc->csum);
 444        }
 445
 446        /* parse flags & pass relevant info up */
 447        if (likely(feat & NETIF_F_RXHASH) &&
 448            gve_needs_rss(rx_desc->flags_seq))
 449                skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
 450                             gve_rss_type(rx_desc->flags_seq));
 451
 452        if (skb_is_nonlinear(skb))
 453                napi_gro_frags(napi);
 454        else
 455                napi_gro_receive(napi, skb);
 456        return true;
 457}
 458
 459static bool gve_rx_work_pending(struct gve_rx_ring *rx)
 460{
 461        struct gve_rx_desc *desc;
 462        __be16 flags_seq;
 463        u32 next_idx;
 464
 465        next_idx = rx->cnt & rx->mask;
 466        desc = rx->desc.desc_ring + next_idx;
 467
 468        flags_seq = desc->flags_seq;
 469        /* Make sure we have synchronized the seq no with the device */
 470        smp_rmb();
 471
 472        return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
 473}
 474
 475static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
 476{
 477        int refill_target = rx->mask + 1;
 478        u32 fill_cnt = rx->fill_cnt;
 479
 480        while (fill_cnt - rx->cnt < refill_target) {
 481                struct gve_rx_slot_page_info *page_info;
 482                u32 idx = fill_cnt & rx->mask;
 483
 484                page_info = &rx->data.page_info[idx];
 485                if (page_info->can_flip) {
 486                        /* The other half of the page is free because it was
 487                         * free when we processed the descriptor. Flip to it.
 488                         */
 489                        union gve_rx_data_slot *data_slot =
 490                                                &rx->data.data_ring[idx];
 491
 492                        gve_rx_flip_buff(page_info, &data_slot->addr);
 493                        page_info->can_flip = 0;
 494                } else {
 495                        /* It is possible that the networking stack has already
 496                         * finished processing all outstanding packets in the buffer
 497                         * and it can be reused.
 498                         * Flipping is unnecessary here - if the networking stack still
 499                         * owns half the page it is impossible to tell which half. Either
 500                         * the whole page is free or it needs to be replaced.
 501                         */
 502                        int recycle = gve_rx_can_recycle_buffer(page_info->page);
 503
 504                        if (recycle < 0) {
 505                                if (!rx->data.raw_addressing)
 506                                        gve_schedule_reset(priv);
 507                                return false;
 508                        }
 509                        if (!recycle) {
 510                                /* We can't reuse the buffer - alloc a new one*/
 511                                union gve_rx_data_slot *data_slot =
 512                                                &rx->data.data_ring[idx];
 513                                struct device *dev = &priv->pdev->dev;
 514
 515                                gve_rx_free_buffer(dev, page_info, data_slot);
 516                                page_info->page = NULL;
 517                                if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot))
 518                                        break;
 519                        }
 520                }
 521                fill_cnt++;
 522        }
 523        rx->fill_cnt = fill_cnt;
 524        return true;
 525}
 526
 527bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
 528                       netdev_features_t feat)
 529{
 530        struct gve_priv *priv = rx->gve;
 531        u32 work_done = 0, packets = 0;
 532        struct gve_rx_desc *desc;
 533        u32 cnt = rx->cnt;
 534        u32 idx = cnt & rx->mask;
 535        u64 bytes = 0;
 536
 537        desc = rx->desc.desc_ring + idx;
 538        while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
 539               work_done < budget) {
 540                bool dropped;
 541
 542                netif_info(priv, rx_status, priv->dev,
 543                           "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
 544                           rx->q_num, idx, desc, desc->flags_seq);
 545                netif_info(priv, rx_status, priv->dev,
 546                           "[%d] seqno=%d rx->desc.seqno=%d\n",
 547                           rx->q_num, GVE_SEQNO(desc->flags_seq),
 548                           rx->desc.seqno);
 549                dropped = !gve_rx(rx, desc, feat, idx);
 550                if (!dropped) {
 551                        bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
 552                        packets++;
 553                }
 554                cnt++;
 555                idx = cnt & rx->mask;
 556                desc = rx->desc.desc_ring + idx;
 557                rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
 558                work_done++;
 559        }
 560
 561        if (!work_done && rx->fill_cnt - cnt > rx->db_threshold)
 562                return false;
 563
 564        u64_stats_update_begin(&rx->statss);
 565        rx->rpackets += packets;
 566        rx->rbytes += bytes;
 567        u64_stats_update_end(&rx->statss);
 568        rx->cnt = cnt;
 569
 570        /* restock ring slots */
 571        if (!rx->data.raw_addressing) {
 572                /* In QPL mode buffs are refilled as the desc are processed */
 573                rx->fill_cnt += work_done;
 574        } else if (rx->fill_cnt - cnt <= rx->db_threshold) {
 575                /* In raw addressing mode buffs are only refilled if the avail
 576                 * falls below a threshold.
 577                 */
 578                if (!gve_rx_refill_buffers(priv, rx))
 579                        return false;
 580
 581                /* If we were not able to completely refill buffers, we'll want
 582                 * to schedule this queue for work again to refill buffers.
 583                 */
 584                if (rx->fill_cnt - cnt <= rx->db_threshold) {
 585                        gve_rx_write_doorbell(priv, rx);
 586                        return true;
 587                }
 588        }
 589
 590        gve_rx_write_doorbell(priv, rx);
 591        return gve_rx_work_pending(rx);
 592}
 593
 594bool gve_rx_poll(struct gve_notify_block *block, int budget)
 595{
 596        struct gve_rx_ring *rx = block->rx;
 597        netdev_features_t feat;
 598        bool repoll = false;
 599
 600        feat = block->napi.dev->features;
 601
 602        /* If budget is 0, do all the work */
 603        if (budget == 0)
 604                budget = INT_MAX;
 605
 606        if (budget > 0)
 607                repoll |= gve_clean_rx_done(rx, budget, feat);
 608        else
 609                repoll |= gve_rx_work_pending(rx);
 610        return repoll;
 611}
 612