linux/drivers/net/ethernet/google/gve/gve_rx.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
   2/* Google virtual Ethernet (gve) driver
   3 *
   4 * Copyright (C) 2015-2019 Google, Inc.
   5 */
   6
   7#include "gve.h"
   8#include "gve_adminq.h"
   9#include <linux/etherdevice.h>
  10
  11static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
  12{
  13        struct gve_notify_block *block =
  14                        &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)];
  15
  16        block->rx = NULL;
  17}
  18
  19static void gve_rx_free_ring(struct gve_priv *priv, int idx)
  20{
  21        struct gve_rx_ring *rx = &priv->rx[idx];
  22        struct device *dev = &priv->pdev->dev;
  23        size_t bytes;
  24        u32 slots;
  25
  26        gve_rx_remove_from_block(priv, idx);
  27
  28        bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
  29        dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
  30        rx->desc.desc_ring = NULL;
  31
  32        dma_free_coherent(dev, sizeof(*rx->q_resources),
  33                          rx->q_resources, rx->q_resources_bus);
  34        rx->q_resources = NULL;
  35
  36        gve_unassign_qpl(priv, rx->data.qpl->id);
  37        rx->data.qpl = NULL;
  38        kvfree(rx->data.page_info);
  39
  40        slots = rx->mask + 1;
  41        bytes = sizeof(*rx->data.data_ring) * slots;
  42        dma_free_coherent(dev, bytes, rx->data.data_ring,
  43                          rx->data.data_bus);
  44        rx->data.data_ring = NULL;
  45        netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
  46}
  47
  48static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
  49                                struct gve_rx_data_slot *slot,
  50                                dma_addr_t addr, struct page *page)
  51{
  52        page_info->page = page;
  53        page_info->page_offset = 0;
  54        page_info->page_address = page_address(page);
  55        slot->qpl_offset = cpu_to_be64(addr);
  56}
  57
  58static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
  59{
  60        struct gve_priv *priv = rx->gve;
  61        u32 slots;
  62        int i;
  63
  64        /* Allocate one page per Rx queue slot. Each page is split into two
  65         * packet buffers, when possible we "page flip" between the two.
  66         */
  67        slots = rx->mask + 1;
  68
  69        rx->data.page_info = kvzalloc(slots *
  70                                      sizeof(*rx->data.page_info), GFP_KERNEL);
  71        if (!rx->data.page_info)
  72                return -ENOMEM;
  73
  74        rx->data.qpl = gve_assign_rx_qpl(priv);
  75
  76        for (i = 0; i < slots; i++) {
  77                struct page *page = rx->data.qpl->pages[i];
  78                dma_addr_t addr = i * PAGE_SIZE;
  79
  80                gve_setup_rx_buffer(&rx->data.page_info[i],
  81                                    &rx->data.data_ring[i], addr, page);
  82        }
  83
  84        return slots;
  85}
  86
  87static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
  88{
  89        u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx);
  90        struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
  91        struct gve_rx_ring *rx = &priv->rx[queue_idx];
  92
  93        block->rx = rx;
  94        rx->ntfy_id = ntfy_idx;
  95}
  96
  97static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
  98{
  99        struct gve_rx_ring *rx = &priv->rx[idx];
 100        struct device *hdev = &priv->pdev->dev;
 101        u32 slots, npages;
 102        int filled_pages;
 103        size_t bytes;
 104        int err;
 105
 106        netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
 107        /* Make sure everything is zeroed to start with */
 108        memset(rx, 0, sizeof(*rx));
 109
 110        rx->gve = priv;
 111        rx->q_num = idx;
 112
 113        slots = priv->rx_pages_per_qpl;
 114        rx->mask = slots - 1;
 115
 116        /* alloc rx data ring */
 117        bytes = sizeof(*rx->data.data_ring) * slots;
 118        rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
 119                                                &rx->data.data_bus,
 120                                                GFP_KERNEL);
 121        if (!rx->data.data_ring)
 122                return -ENOMEM;
 123        filled_pages = gve_prefill_rx_pages(rx);
 124        if (filled_pages < 0) {
 125                err = -ENOMEM;
 126                goto abort_with_slots;
 127        }
 128        rx->fill_cnt = filled_pages;
 129        /* Ensure data ring slots (packet buffers) are visible. */
 130        dma_wmb();
 131
 132        /* Alloc gve_queue_resources */
 133        rx->q_resources =
 134                dma_alloc_coherent(hdev,
 135                                   sizeof(*rx->q_resources),
 136                                   &rx->q_resources_bus,
 137                                   GFP_KERNEL);
 138        if (!rx->q_resources) {
 139                err = -ENOMEM;
 140                goto abort_filled;
 141        }
 142        netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
 143                  (unsigned long)rx->data.data_bus);
 144
 145        /* alloc rx desc ring */
 146        bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
 147        npages = bytes / PAGE_SIZE;
 148        if (npages * PAGE_SIZE != bytes) {
 149                err = -EIO;
 150                goto abort_with_q_resources;
 151        }
 152
 153        rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
 154                                                GFP_KERNEL);
 155        if (!rx->desc.desc_ring) {
 156                err = -ENOMEM;
 157                goto abort_with_q_resources;
 158        }
 159        rx->mask = slots - 1;
 160        rx->cnt = 0;
 161        rx->desc.seqno = 1;
 162        gve_rx_add_to_block(priv, idx);
 163
 164        return 0;
 165
 166abort_with_q_resources:
 167        dma_free_coherent(hdev, sizeof(*rx->q_resources),
 168                          rx->q_resources, rx->q_resources_bus);
 169        rx->q_resources = NULL;
 170abort_filled:
 171        kvfree(rx->data.page_info);
 172abort_with_slots:
 173        bytes = sizeof(*rx->data.data_ring) * slots;
 174        dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
 175        rx->data.data_ring = NULL;
 176
 177        return err;
 178}
 179
 180int gve_rx_alloc_rings(struct gve_priv *priv)
 181{
 182        int err = 0;
 183        int i;
 184
 185        for (i = 0; i < priv->rx_cfg.num_queues; i++) {
 186                err = gve_rx_alloc_ring(priv, i);
 187                if (err) {
 188                        netif_err(priv, drv, priv->dev,
 189                                  "Failed to alloc rx ring=%d: err=%d\n",
 190                                  i, err);
 191                        break;
 192                }
 193        }
 194        /* Unallocate if there was an error */
 195        if (err) {
 196                int j;
 197
 198                for (j = 0; j < i; j++)
 199                        gve_rx_free_ring(priv, j);
 200        }
 201        return err;
 202}
 203
 204void gve_rx_free_rings(struct gve_priv *priv)
 205{
 206        int i;
 207
 208        for (i = 0; i < priv->rx_cfg.num_queues; i++)
 209                gve_rx_free_ring(priv, i);
 210}
 211
 212void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
 213{
 214        u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
 215
 216        iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
 217}
 218
 219static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
 220{
 221        if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
 222                return PKT_HASH_TYPE_L4;
 223        if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
 224                return PKT_HASH_TYPE_L3;
 225        return PKT_HASH_TYPE_L2;
 226}
 227
 228static struct sk_buff *gve_rx_copy(struct net_device *dev,
 229                                   struct napi_struct *napi,
 230                                   struct gve_rx_slot_page_info *page_info,
 231                                   u16 len)
 232{
 233        struct sk_buff *skb = napi_alloc_skb(napi, len);
 234        void *va = page_info->page_address + GVE_RX_PAD +
 235                   page_info->page_offset;
 236
 237        if (unlikely(!skb))
 238                return NULL;
 239
 240        __skb_put(skb, len);
 241
 242        skb_copy_to_linear_data(skb, va, len);
 243
 244        skb->protocol = eth_type_trans(skb, dev);
 245        return skb;
 246}
 247
 248static struct sk_buff *gve_rx_add_frags(struct net_device *dev,
 249                                        struct napi_struct *napi,
 250                                        struct gve_rx_slot_page_info *page_info,
 251                                        u16 len)
 252{
 253        struct sk_buff *skb = napi_get_frags(napi);
 254
 255        if (unlikely(!skb))
 256                return NULL;
 257
 258        skb_add_rx_frag(skb, 0, page_info->page,
 259                        page_info->page_offset +
 260                        GVE_RX_PAD, len, PAGE_SIZE / 2);
 261
 262        return skb;
 263}
 264
 265static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
 266                             struct gve_rx_data_slot *data_ring)
 267{
 268        u64 addr = be64_to_cpu(data_ring->qpl_offset);
 269
 270        page_info->page_offset ^= PAGE_SIZE / 2;
 271        addr ^= PAGE_SIZE / 2;
 272        data_ring->qpl_offset = cpu_to_be64(addr);
 273}
 274
 275static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
 276                   netdev_features_t feat, u32 idx)
 277{
 278        struct gve_rx_slot_page_info *page_info;
 279        struct gve_priv *priv = rx->gve;
 280        struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
 281        struct net_device *dev = priv->dev;
 282        struct sk_buff *skb;
 283        int pagecount;
 284        u16 len;
 285
 286        /* drop this packet */
 287        if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR))
 288                return true;
 289
 290        len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
 291        page_info = &rx->data.page_info[idx];
 292
 293        /* gvnic can only receive into registered segments. If the buffer
 294         * can't be recycled, our only choice is to copy the data out of
 295         * it so that we can return it to the device.
 296         */
 297
 298        if (PAGE_SIZE == 4096) {
 299                if (len <= priv->rx_copybreak) {
 300                        /* Just copy small packets */
 301                        skb = gve_rx_copy(dev, napi, page_info, len);
 302                        goto have_skb;
 303                }
 304                if (unlikely(!gve_can_recycle_pages(dev))) {
 305                        skb = gve_rx_copy(dev, napi, page_info, len);
 306                        goto have_skb;
 307                }
 308                pagecount = page_count(page_info->page);
 309                if (pagecount == 1) {
 310                        /* No part of this page is used by any SKBs; we attach
 311                         * the page fragment to a new SKB and pass it up the
 312                         * stack.
 313                         */
 314                        skb = gve_rx_add_frags(dev, napi, page_info, len);
 315                        if (!skb)
 316                                return true;
 317                        /* Make sure the kernel stack can't release the page */
 318                        get_page(page_info->page);
 319                        /* "flip" to other packet buffer on this page */
 320                        gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]);
 321                } else if (pagecount >= 2) {
 322                        /* We have previously passed the other half of this
 323                         * page up the stack, but it has not yet been freed.
 324                         */
 325                        skb = gve_rx_copy(dev, napi, page_info, len);
 326                } else {
 327                        WARN(pagecount < 1, "Pagecount should never be < 1");
 328                        return false;
 329                }
 330        } else {
 331                skb = gve_rx_copy(dev, napi, page_info, len);
 332        }
 333
 334have_skb:
 335        /* We didn't manage to allocate an skb but we haven't had any
 336         * reset worthy failures.
 337         */
 338        if (!skb)
 339                return true;
 340
 341        if (likely(feat & NETIF_F_RXCSUM)) {
 342                /* NIC passes up the partial sum */
 343                if (rx_desc->csum)
 344                        skb->ip_summed = CHECKSUM_COMPLETE;
 345                else
 346                        skb->ip_summed = CHECKSUM_NONE;
 347                skb->csum = csum_unfold(rx_desc->csum);
 348        }
 349
 350        /* parse flags & pass relevant info up */
 351        if (likely(feat & NETIF_F_RXHASH) &&
 352            gve_needs_rss(rx_desc->flags_seq))
 353                skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
 354                             gve_rss_type(rx_desc->flags_seq));
 355
 356        if (skb_is_nonlinear(skb))
 357                napi_gro_frags(napi);
 358        else
 359                napi_gro_receive(napi, skb);
 360        return true;
 361}
 362
 363static bool gve_rx_work_pending(struct gve_rx_ring *rx)
 364{
 365        struct gve_rx_desc *desc;
 366        __be16 flags_seq;
 367        u32 next_idx;
 368
 369        next_idx = rx->cnt & rx->mask;
 370        desc = rx->desc.desc_ring + next_idx;
 371
 372        flags_seq = desc->flags_seq;
 373        /* Make sure we have synchronized the seq no with the device */
 374        smp_rmb();
 375
 376        return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
 377}
 378
 379bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
 380                       netdev_features_t feat)
 381{
 382        struct gve_priv *priv = rx->gve;
 383        struct gve_rx_desc *desc;
 384        u32 cnt = rx->cnt;
 385        u32 idx = cnt & rx->mask;
 386        u32 work_done = 0;
 387        u64 bytes = 0;
 388
 389        desc = rx->desc.desc_ring + idx;
 390        while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
 391               work_done < budget) {
 392                netif_info(priv, rx_status, priv->dev,
 393                           "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
 394                           rx->q_num, idx, desc, desc->flags_seq);
 395                netif_info(priv, rx_status, priv->dev,
 396                           "[%d] seqno=%d rx->desc.seqno=%d\n",
 397                           rx->q_num, GVE_SEQNO(desc->flags_seq),
 398                           rx->desc.seqno);
 399                bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
 400                if (!gve_rx(rx, desc, feat, idx))
 401                        gve_schedule_reset(priv);
 402                cnt++;
 403                idx = cnt & rx->mask;
 404                desc = rx->desc.desc_ring + idx;
 405                rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
 406                work_done++;
 407        }
 408
 409        if (!work_done)
 410                return false;
 411
 412        u64_stats_update_begin(&rx->statss);
 413        rx->rpackets += work_done;
 414        rx->rbytes += bytes;
 415        u64_stats_update_end(&rx->statss);
 416        rx->cnt = cnt;
 417        rx->fill_cnt += work_done;
 418
 419        /* restock desc ring slots */
 420        dma_wmb();      /* Ensure descs are visible before ringing doorbell */
 421        gve_rx_write_doorbell(priv, rx);
 422        return gve_rx_work_pending(rx);
 423}
 424
 425bool gve_rx_poll(struct gve_notify_block *block, int budget)
 426{
 427        struct gve_rx_ring *rx = block->rx;
 428        netdev_features_t feat;
 429        bool repoll = false;
 430
 431        feat = block->napi.dev->features;
 432
 433        /* If budget is 0, do all the work */
 434        if (budget == 0)
 435                budget = INT_MAX;
 436
 437        if (budget > 0)
 438                repoll |= gve_clean_rx_done(rx, budget, feat);
 439        else
 440                repoll |= gve_rx_work_pending(rx);
 441        return repoll;
 442}
 443