LXR linux/drivers/net/ethernet/google/gve/gve

   1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
   2/* Google virtual Ethernet (gve) driver
   3 *
   4 * Copyright (C) 2015-2019 Google, Inc.
   5 */
   6
   7#include "gve.h"
   8#include "gve_adminq.h"
   9#include <linux/ip.h>
  10#include <linux/tcp.h>
  11#include <linux/vmalloc.h>
  12#include <linux/skbuff.h>
  13
  14static inline void gve_tx_put_doorbell(struct gve_priv *priv,
  15                                       struct gve_queue_resources *q_resources,
  16                                       u32 val)
  17{
  18        iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]);
  19}
  20
  21/* gvnic can only transmit from a Registered Segment.
  22 * We copy skb payloads into the registered segment before writing Tx
  23 * descriptors and ringing the Tx doorbell.
  24 *
  25 * gve_tx_fifo_* manages the Registered Segment as a FIFO - clients must
  26 * free allocations in the order they were allocated.
  27 */
  28
  29static int gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_fifo *fifo)
  30{
  31        fifo->base = vmap(fifo->qpl->pages, fifo->qpl->num_entries, VM_MAP,
  32                          PAGE_KERNEL);
  33        if (unlikely(!fifo->base)) {
  34                netif_err(priv, drv, priv->dev, "Failed to vmap fifo, qpl_id = %d\n",
  35                          fifo->qpl->id);
  36                return -ENOMEM;
  37        }
  38
  39        fifo->size = fifo->qpl->num_entries * PAGE_SIZE;
  40        atomic_set(&fifo->available, fifo->size);
  41        fifo->head = 0;
  42        return 0;
  43}
  44
  45static void gve_tx_fifo_release(struct gve_priv *priv, struct gve_tx_fifo *fifo)
  46{
  47        WARN(atomic_read(&fifo->available) != fifo->size,
  48             "Releasing non-empty fifo");
  49
  50        vunmap(fifo->base);
  51}
  52
  53static int gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo,
  54                                          size_t bytes)
  55{
  56        return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head;
  57}
  58
  59static bool gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes)
  60{
  61        return (atomic_read(&fifo->available) <= bytes) ? false : true;
  62}
  63
  64/* gve_tx_alloc_fifo - Allocate fragment(s) from Tx FIFO
  65 * @fifo: FIFO to allocate from
  66 * @bytes: Allocation size
  67 * @iov: Scatter-gather elements to fill with allocation fragment base/len
  68 *
  69 * Returns number of valid elements in iov[] or negative on error.
  70 *
  71 * Allocations from a given FIFO must be externally synchronized but concurrent
  72 * allocation and frees are allowed.
  73 */
  74static int gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes,
  75                             struct gve_tx_iovec iov[2])
  76{
  77        size_t overflow, padding;
  78        u32 aligned_head;
  79        int nfrags = 0;
  80
  81        if (!bytes)
  82                return 0;
  83
  84        /* This check happens before we know how much padding is needed to
  85         * align to a cacheline boundary for the payload, but that is fine,
  86         * because the FIFO head always start aligned, and the FIFO's boundaries
  87         * are aligned, so if there is space for the data, there is space for
  88         * the padding to the next alignment.
  89         */
  90        WARN(!gve_tx_fifo_can_alloc(fifo, bytes),
  91             "Reached %s when there's not enough space in the fifo", __func__);
  92
  93        nfrags++;
  94
  95        iov[0].iov_offset = fifo->head;
  96        iov[0].iov_len = bytes;
  97        fifo->head += bytes;
  98
  99        if (fifo->head > fifo->size) {
 100                /* If the allocation did not fit in the tail fragment of the
 101                 * FIFO, also use the head fragment.
 102                 */
 103                nfrags++;
 104                overflow = fifo->head - fifo->size;
 105                iov[0].iov_len -= overflow;
 106                iov[1].iov_offset = 0;  /* Start of fifo*/
 107                iov[1].iov_len = overflow;
 108
 109                fifo->head = overflow;
 110        }
 111
 112        /* Re-align to a cacheline boundary */
 113        aligned_head = L1_CACHE_ALIGN(fifo->head);
 114        padding = aligned_head - fifo->head;
 115        iov[nfrags - 1].iov_padding = padding;
 116        atomic_sub(bytes + padding, &fifo->available);
 117        fifo->head = aligned_head;
 118
 119        if (fifo->head == fifo->size)
 120                fifo->head = 0;
 121
 122        return nfrags;
 123}
 124
 125/* gve_tx_free_fifo - Return space to Tx FIFO
 126 * @fifo: FIFO to return fragments to
 127 * @bytes: Bytes to free
 128 */
 129static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes)
 130{
 131        atomic_add(bytes, &fifo->available);
 132}
 133
 134static void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx)
 135{
 136        struct gve_notify_block *block =
 137                        &priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)];
 138
 139        block->tx = NULL;
 140}
 141
 142static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
 143                             u32 to_do, bool try_to_wake);
 144
 145static void gve_tx_free_ring(struct gve_priv *priv, int idx)
 146{
 147        struct gve_tx_ring *tx = &priv->tx[idx];
 148        struct device *hdev = &priv->pdev->dev;
 149        size_t bytes;
 150        u32 slots;
 151
 152        gve_tx_remove_from_block(priv, idx);
 153        slots = tx->mask + 1;
 154        gve_clean_tx_done(priv, tx, tx->req, false);
 155        netdev_tx_reset_queue(tx->netdev_txq);
 156
 157        dma_free_coherent(hdev, sizeof(*tx->q_resources),
 158                          tx->q_resources, tx->q_resources_bus);
 159        tx->q_resources = NULL;
 160
 161        gve_tx_fifo_release(priv, &tx->tx_fifo);
 162        gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
 163        tx->tx_fifo.qpl = NULL;
 164
 165        bytes = sizeof(*tx->desc) * slots;
 166        dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
 167        tx->desc = NULL;
 168
 169        vfree(tx->info);
 170        tx->info = NULL;
 171
 172        netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
 173}
 174
 175static void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx)
 176{
 177        int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx);
 178        struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
 179        struct gve_tx_ring *tx = &priv->tx[queue_idx];
 180
 181        block->tx = tx;
 182        tx->ntfy_id = ntfy_idx;
 183}
 184
 185static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
 186{
 187        struct gve_tx_ring *tx = &priv->tx[idx];
 188        struct device *hdev = &priv->pdev->dev;
 189        u32 slots = priv->tx_desc_cnt;
 190        size_t bytes;
 191
 192        /* Make sure everything is zeroed to start */
 193        memset(tx, 0, sizeof(*tx));
 194        tx->q_num = idx;
 195
 196        tx->mask = slots - 1;
 197
 198        /* alloc metadata */
 199        tx->info = vzalloc(sizeof(*tx->info) * slots);
 200        if (!tx->info)
 201                return -ENOMEM;
 202
 203        /* alloc tx queue */
 204        bytes = sizeof(*tx->desc) * slots;
 205        tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL);
 206        if (!tx->desc)
 207                goto abort_with_info;
 208
 209        tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
 210
 211        /* map Tx FIFO */
 212        if (gve_tx_fifo_init(priv, &tx->tx_fifo))
 213                goto abort_with_desc;
 214
 215        tx->q_resources =
 216                dma_alloc_coherent(hdev,
 217                                   sizeof(*tx->q_resources),
 218                                   &tx->q_resources_bus,
 219                                   GFP_KERNEL);
 220        if (!tx->q_resources)
 221                goto abort_with_fifo;
 222
 223        netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx,
 224                  (unsigned long)tx->bus);
 225        tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
 226        gve_tx_add_to_block(priv, idx);
 227
 228        return 0;
 229
 230abort_with_fifo:
 231        gve_tx_fifo_release(priv, &tx->tx_fifo);
 232abort_with_desc:
 233        dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
 234        tx->desc = NULL;
 235abort_with_info:
 236        vfree(tx->info);
 237        tx->info = NULL;
 238        return -ENOMEM;
 239}
 240
 241int gve_tx_alloc_rings(struct gve_priv *priv)
 242{
 243        int err = 0;
 244        int i;
 245
 246        for (i = 0; i < priv->tx_cfg.num_queues; i++) {
 247                err = gve_tx_alloc_ring(priv, i);
 248                if (err) {
 249                        netif_err(priv, drv, priv->dev,
 250                                  "Failed to alloc tx ring=%d: err=%d\n",
 251                                  i, err);
 252                        break;
 253                }
 254        }
 255        /* Unallocate if there was an error */
 256        if (err) {
 257                int j;
 258
 259                for (j = 0; j < i; j++)
 260                        gve_tx_free_ring(priv, j);
 261        }
 262        return err;
 263}
 264
 265void gve_tx_free_rings(struct gve_priv *priv)
 266{
 267        int i;
 268
 269        for (i = 0; i < priv->tx_cfg.num_queues; i++)
 270                gve_tx_free_ring(priv, i);
 271}
 272
 273/* gve_tx_avail - Calculates the number of slots available in the ring
 274 * @tx: tx ring to check
 275 *
 276 * Returns the number of slots available
 277 *
 278 * The capacity of the queue is mask + 1. We don't need to reserve an entry.
 279 **/
 280static inline u32 gve_tx_avail(struct gve_tx_ring *tx)
 281{
 282        return tx->mask + 1 - (tx->req - tx->done);
 283}
 284
 285static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
 286                                              struct sk_buff *skb)
 287{
 288        int pad_bytes, align_hdr_pad;
 289        int bytes;
 290        int hlen;
 291
 292        hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) +
 293                                 tcp_hdrlen(skb) : skb_headlen(skb);
 294
 295        pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo,
 296                                                   hlen);
 297        /* We need to take into account the header alignment padding. */
 298        align_hdr_pad = L1_CACHE_ALIGN(hlen) - hlen;
 299        bytes = align_hdr_pad + pad_bytes + skb->len;
 300
 301        return bytes;
 302}
 303
 304/* The most descriptors we could need are 3 - 1 for the headers, 1 for
 305 * the beginning of the payload at the end of the FIFO, and 1 if the
 306 * payload wraps to the beginning of the FIFO.
 307 */
 308#define MAX_TX_DESC_NEEDED      3
 309
 310/* Check if sufficient resources (descriptor ring space, FIFO space) are
 311 * available to transmit the given number of bytes.
 312 */
 313static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required)
 314{
 315        return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED &&
 316                gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required));
 317}
 318
 319/* Stops the queue if the skb cannot be transmitted. */
 320static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb)
 321{
 322        int bytes_required;
 323
 324        bytes_required = gve_skb_fifo_bytes_required(tx, skb);
 325        if (likely(gve_can_tx(tx, bytes_required)))
 326                return 0;
 327
 328        /* No space, so stop the queue */
 329        tx->stop_queue++;
 330        netif_tx_stop_queue(tx->netdev_txq);
 331        smp_mb();       /* sync with restarting queue in gve_clean_tx_done() */
 332
 333        /* Now check for resources again, in case gve_clean_tx_done() freed
 334         * resources after we checked and we stopped the queue after
 335         * gve_clean_tx_done() checked.
 336         *
 337         * gve_maybe_stop_tx()                  gve_clean_tx_done()
 338         *   nsegs/can_alloc test failed
 339         *                                        gve_tx_free_fifo()
 340         *                                        if (tx queue stopped)
 341         *                                          netif_tx_queue_wake()
 342         *   netif_tx_stop_queue()
 343         *   Need to check again for space here!
 344         */
 345        if (likely(!gve_can_tx(tx, bytes_required)))
 346                return -EBUSY;
 347
 348        netif_tx_start_queue(tx->netdev_txq);
 349        tx->wake_queue++;
 350        return 0;
 351}
 352
 353static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
 354                                 struct sk_buff *skb, bool is_gso,
 355                                 int l4_hdr_offset, u32 desc_cnt,
 356                                 u16 hlen, u64 addr)
 357{
 358        /* l4_hdr_offset and csum_offset are in units of 16-bit words */
 359        if (is_gso) {
 360                pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
 361                pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
 362                pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
 363        } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 364                pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
 365                pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
 366                pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
 367        } else {
 368                pkt_desc->pkt.type_flags = GVE_TXD_STD;
 369                pkt_desc->pkt.l4_csum_offset = 0;
 370                pkt_desc->pkt.l4_hdr_offset = 0;
 371        }
 372        pkt_desc->pkt.desc_cnt = desc_cnt;
 373        pkt_desc->pkt.len = cpu_to_be16(skb->len);
 374        pkt_desc->pkt.seg_len = cpu_to_be16(hlen);
 375        pkt_desc->pkt.seg_addr = cpu_to_be64(addr);
 376}
 377
 378static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc,
 379                                 struct sk_buff *skb, bool is_gso,
 380                                 u16 len, u64 addr)
 381{
 382        seg_desc->seg.type_flags = GVE_TXD_SEG;
 383        if (is_gso) {
 384                if (skb_is_gso_v6(skb))
 385                        seg_desc->seg.type_flags |= GVE_TXSF_IPV6;
 386                seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1;
 387                seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
 388        }
 389        seg_desc->seg.seg_len = cpu_to_be16(len);
 390        seg_desc->seg.seg_addr = cpu_to_be64(addr);
 391}
 392
 393static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb)
 394{
 395        int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
 396        union gve_tx_desc *pkt_desc, *seg_desc;
 397        struct gve_tx_buffer_state *info;
 398        bool is_gso = skb_is_gso(skb);
 399        u32 idx = tx->req & tx->mask;
 400        int payload_iov = 2;
 401        int copy_offset;
 402        u32 next_idx;
 403        int i;
 404
 405        info = &tx->info[idx];
 406        pkt_desc = &tx->desc[idx];
 407
 408        l4_hdr_offset = skb_checksum_start_offset(skb);
 409        /* If the skb is gso, then we want the tcp header in the first segment
 410         * otherwise we want the linear portion of the skb (which will contain
 411         * the checksum because skb->csum_start and skb->csum_offset are given
 412         * relative to skb->head) in the first segment.
 413         */
 414        hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) :
 415                        skb_headlen(skb);
 416
 417        info->skb =  skb;
 418        /* We don't want to split the header, so if necessary, pad to the end
 419         * of the fifo and then put the header at the beginning of the fifo.
 420         */
 421        pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen);
 422        hdr_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, hlen + pad_bytes,
 423                                       &info->iov[0]);
 424        WARN(!hdr_nfrags, "hdr_nfrags should never be 0!");
 425        payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen,
 426                                           &info->iov[payload_iov]);
 427
 428        gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
 429                             1 + payload_nfrags, hlen,
 430                             info->iov[hdr_nfrags - 1].iov_offset);
 431
 432        skb_copy_bits(skb, 0,
 433                      tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
 434                      hlen);
 435        copy_offset = hlen;
 436
 437        for (i = payload_iov; i < payload_nfrags + payload_iov; i++) {
 438                next_idx = (tx->req + 1 + i - payload_iov) & tx->mask;
 439                seg_desc = &tx->desc[next_idx];
 440
 441                gve_tx_fill_seg_desc(seg_desc, skb, is_gso,
 442                                     info->iov[i].iov_len,
 443                                     info->iov[i].iov_offset);
 444
 445                skb_copy_bits(skb, copy_offset,
 446                              tx->tx_fifo.base + info->iov[i].iov_offset,
 447                              info->iov[i].iov_len);
 448                copy_offset += info->iov[i].iov_len;
 449        }
 450
 451        return 1 + payload_nfrags;
 452}
 453
 454netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
 455{
 456        struct gve_priv *priv = netdev_priv(dev);
 457        struct gve_tx_ring *tx;
 458        int nsegs;
 459
 460        WARN(skb_get_queue_mapping(skb) > priv->tx_cfg.num_queues,
 461             "skb queue index out of range");
 462        tx = &priv->tx[skb_get_queue_mapping(skb)];
 463        if (unlikely(gve_maybe_stop_tx(tx, skb))) {
 464                /* We need to ring the txq doorbell -- we have stopped the Tx
 465                 * queue for want of resources, but prior calls to gve_tx()
 466                 * may have added descriptors without ringing the doorbell.
 467                 */
 468
 469                /* Ensure tx descs from a prior gve_tx are visible before
 470                 * ringing doorbell.
 471                 */
 472                dma_wmb();
 473                gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
 474                return NETDEV_TX_BUSY;
 475        }
 476        nsegs = gve_tx_add_skb(tx, skb);
 477
 478        netdev_tx_sent_queue(tx->netdev_txq, skb->len);
 479        skb_tx_timestamp(skb);
 480
 481        /* give packets to NIC */
 482        tx->req += nsegs;
 483
 484        if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more())
 485                return NETDEV_TX_OK;
 486
 487        /* Ensure tx descs are visible before ringing doorbell */
 488        dma_wmb();
 489        gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
 490        return NETDEV_TX_OK;
 491}
 492
 493#define GVE_TX_START_THRESH     PAGE_SIZE
 494
 495static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
 496                             u32 to_do, bool try_to_wake)
 497{
 498        struct gve_tx_buffer_state *info;
 499        u64 pkts = 0, bytes = 0;
 500        size_t space_freed = 0;
 501        struct sk_buff *skb;
 502        int i, j;
 503        u32 idx;
 504
 505        for (j = 0; j < to_do; j++) {
 506                idx = tx->done & tx->mask;
 507                netif_info(priv, tx_done, priv->dev,
 508                           "[%d] %s: idx=%d (req=%u done=%u)\n",
 509                           tx->q_num, __func__, idx, tx->req, tx->done);
 510                info = &tx->info[idx];
 511                skb = info->skb;
 512
 513                /* Mark as free */
 514                if (skb) {
 515                        info->skb = NULL;
 516                        bytes += skb->len;
 517                        pkts++;
 518                        dev_consume_skb_any(skb);
 519                        /* FIFO free */
 520                        for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
 521                                space_freed += info->iov[i].iov_len +
 522                                               info->iov[i].iov_padding;
 523                                info->iov[i].iov_len = 0;
 524                                info->iov[i].iov_padding = 0;
 525                        }
 526                }
 527                tx->done++;
 528        }
 529
 530        gve_tx_free_fifo(&tx->tx_fifo, space_freed);
 531        u64_stats_update_begin(&tx->statss);
 532        tx->bytes_done += bytes;
 533        tx->pkt_done += pkts;
 534        u64_stats_update_end(&tx->statss);
 535        netdev_tx_completed_queue(tx->netdev_txq, pkts, bytes);
 536
 537        /* start the queue if we've stopped it */
 538#ifndef CONFIG_BQL
 539        /* Make sure that the doorbells are synced */
 540        smp_mb();
 541#endif
 542        if (try_to_wake && netif_tx_queue_stopped(tx->netdev_txq) &&
 543            likely(gve_can_tx(tx, GVE_TX_START_THRESH))) {
 544                tx->wake_queue++;
 545                netif_tx_wake_queue(tx->netdev_txq);
 546        }
 547
 548        return pkts;
 549}
 550
 551__be32 gve_tx_load_event_counter(struct gve_priv *priv,
 552                                 struct gve_tx_ring *tx)
 553{
 554        u32 counter_index = be32_to_cpu((tx->q_resources->counter_index));
 555
 556        return READ_ONCE(priv->counter_array[counter_index]);
 557}
 558
 559bool gve_tx_poll(struct gve_notify_block *block, int budget)
 560{
 561        struct gve_priv *priv = block->priv;
 562        struct gve_tx_ring *tx = block->tx;
 563        bool repoll = false;
 564        u32 nic_done;
 565        u32 to_do;
 566
 567        /* If budget is 0, do all the work */
 568        if (budget == 0)
 569                budget = INT_MAX;
 570
 571        /* Find out how much work there is to be done */
 572        tx->last_nic_done = gve_tx_load_event_counter(priv, tx);
 573        nic_done = be32_to_cpu(tx->last_nic_done);
 574        if (budget > 0) {
 575                /* Do as much work as we have that the budget will
 576                 * allow
 577                 */
 578                to_do = min_t(u32, (nic_done - tx->done), budget);
 579                gve_clean_tx_done(priv, tx, to_do, true);
 580        }
 581        /* If we still have work we want to repoll */
 582        repoll |= (nic_done != tx->done);
 583        return repoll;
 584}
 585