linux/drivers/net/ethernet/sfc/tx.c
<<
>>
Prefs
   1/****************************************************************************
   2 * Driver for Solarflare network controllers and boards
   3 * Copyright 2005-2006 Fen Systems Ltd.
   4 * Copyright 2005-2013 Solarflare Communications Inc.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published
   8 * by the Free Software Foundation, incorporated herein by reference.
   9 */
  10
  11#include <linux/pci.h>
  12#include <linux/tcp.h>
  13#include <linux/ip.h>
  14#include <linux/in.h>
  15#include <linux/ipv6.h>
  16#include <linux/slab.h>
  17#include <net/ipv6.h>
  18#include <linux/if_ether.h>
  19#include <linux/highmem.h>
  20#include <linux/cache.h>
  21#include "net_driver.h"
  22#include "efx.h"
  23#include "io.h"
  24#include "nic.h"
  25#include "tx.h"
  26#include "workarounds.h"
  27#include "ef10_regs.h"
  28
  29#ifdef EFX_USE_PIO
  30
  31#define EFX_PIOBUF_SIZE_DEF ALIGN(256, L1_CACHE_BYTES)
  32unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF;
  33
  34#endif /* EFX_USE_PIO */
  35
  36static inline u8 *efx_tx_get_copy_buffer(struct efx_tx_queue *tx_queue,
  37                                         struct efx_tx_buffer *buffer)
  38{
  39        unsigned int index = efx_tx_queue_get_insert_index(tx_queue);
  40        struct efx_buffer *page_buf =
  41                &tx_queue->cb_page[index >> (PAGE_SHIFT - EFX_TX_CB_ORDER)];
  42        unsigned int offset =
  43                ((index << EFX_TX_CB_ORDER) + NET_IP_ALIGN) & (PAGE_SIZE - 1);
  44
  45        if (unlikely(!page_buf->addr) &&
  46            efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE,
  47                                 GFP_ATOMIC))
  48                return NULL;
  49        buffer->dma_addr = page_buf->dma_addr + offset;
  50        buffer->unmap_len = 0;
  51        return (u8 *)page_buf->addr + offset;
  52}
  53
  54u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
  55                                   struct efx_tx_buffer *buffer, size_t len)
  56{
  57        if (len > EFX_TX_CB_SIZE)
  58                return NULL;
  59        return efx_tx_get_copy_buffer(tx_queue, buffer);
  60}
  61
  62static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
  63                               struct efx_tx_buffer *buffer,
  64                               unsigned int *pkts_compl,
  65                               unsigned int *bytes_compl)
  66{
  67        if (buffer->unmap_len) {
  68                struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
  69                dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset;
  70                if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
  71                        dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
  72                                         DMA_TO_DEVICE);
  73                else
  74                        dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
  75                                       DMA_TO_DEVICE);
  76                buffer->unmap_len = 0;
  77        }
  78
  79        if (buffer->flags & EFX_TX_BUF_SKB) {
  80                (*pkts_compl)++;
  81                (*bytes_compl) += buffer->skb->len;
  82                dev_consume_skb_any((struct sk_buff *)buffer->skb);
  83                netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
  84                           "TX queue %d transmission id %x complete\n",
  85                           tx_queue->queue, tx_queue->read_count);
  86        }
  87
  88        buffer->len = 0;
  89        buffer->flags = 0;
  90}
  91
  92unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
  93{
  94        /* Header and payload descriptor for each output segment, plus
  95         * one for every input fragment boundary within a segment
  96         */
  97        unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
  98
  99        /* Possibly one more per segment for option descriptors */
 100        if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
 101                max_descs += EFX_TSO_MAX_SEGS;
 102
 103        /* Possibly more for PCIe page boundaries within input fragments */
 104        if (PAGE_SIZE > EFX_PAGE_SIZE)
 105                max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
 106                                   DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE));
 107
 108        return max_descs;
 109}
 110
 111static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
 112{
 113        /* We need to consider both queues that the net core sees as one */
 114        struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1);
 115        struct efx_nic *efx = txq1->efx;
 116        unsigned int fill_level;
 117
 118        fill_level = max(txq1->insert_count - txq1->old_read_count,
 119                         txq2->insert_count - txq2->old_read_count);
 120        if (likely(fill_level < efx->txq_stop_thresh))
 121                return;
 122
 123        /* We used the stale old_read_count above, which gives us a
 124         * pessimistic estimate of the fill level (which may even
 125         * validly be >= efx->txq_entries).  Now try again using
 126         * read_count (more likely to be a cache miss).
 127         *
 128         * If we read read_count and then conditionally stop the
 129         * queue, it is possible for the completion path to race with
 130         * us and complete all outstanding descriptors in the middle,
 131         * after which there will be no more completions to wake it.
 132         * Therefore we stop the queue first, then read read_count
 133         * (with a memory barrier to ensure the ordering), then
 134         * restart the queue if the fill level turns out to be low
 135         * enough.
 136         */
 137        netif_tx_stop_queue(txq1->core_txq);
 138        smp_mb();
 139        txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
 140        txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
 141
 142        fill_level = max(txq1->insert_count - txq1->old_read_count,
 143                         txq2->insert_count - txq2->old_read_count);
 144        EFX_WARN_ON_ONCE_PARANOID(fill_level >= efx->txq_entries);
 145        if (likely(fill_level < efx->txq_stop_thresh)) {
 146                smp_mb();
 147                if (likely(!efx->loopback_selftest))
 148                        netif_tx_start_queue(txq1->core_txq);
 149        }
 150}
 151
 152static int efx_enqueue_skb_copy(struct efx_tx_queue *tx_queue,
 153                                struct sk_buff *skb)
 154{
 155        unsigned int copy_len = skb->len;
 156        struct efx_tx_buffer *buffer;
 157        u8 *copy_buffer;
 158        int rc;
 159
 160        EFX_WARN_ON_ONCE_PARANOID(copy_len > EFX_TX_CB_SIZE);
 161
 162        buffer = efx_tx_queue_get_insert_buffer(tx_queue);
 163
 164        copy_buffer = efx_tx_get_copy_buffer(tx_queue, buffer);
 165        if (unlikely(!copy_buffer))
 166                return -ENOMEM;
 167
 168        rc = skb_copy_bits(skb, 0, copy_buffer, copy_len);
 169        EFX_WARN_ON_PARANOID(rc);
 170        buffer->len = copy_len;
 171
 172        buffer->skb = skb;
 173        buffer->flags = EFX_TX_BUF_SKB;
 174
 175        ++tx_queue->insert_count;
 176        return rc;
 177}
 178
 179#ifdef EFX_USE_PIO
 180
 181struct efx_short_copy_buffer {
 182        int used;
 183        u8 buf[L1_CACHE_BYTES];
 184};
 185
 186/* Copy to PIO, respecting that writes to PIO buffers must be dword aligned.
 187 * Advances piobuf pointer. Leaves additional data in the copy buffer.
 188 */
 189static void efx_memcpy_toio_aligned(struct efx_nic *efx, u8 __iomem **piobuf,
 190                                    u8 *data, int len,
 191                                    struct efx_short_copy_buffer *copy_buf)
 192{
 193        int block_len = len & ~(sizeof(copy_buf->buf) - 1);
 194
 195        __iowrite64_copy(*piobuf, data, block_len >> 3);
 196        *piobuf += block_len;
 197        len -= block_len;
 198
 199        if (len) {
 200                data += block_len;
 201                BUG_ON(copy_buf->used);
 202                BUG_ON(len > sizeof(copy_buf->buf));
 203                memcpy(copy_buf->buf, data, len);
 204                copy_buf->used = len;
 205        }
 206}
 207
 208/* Copy to PIO, respecting dword alignment, popping data from copy buffer first.
 209 * Advances piobuf pointer. Leaves additional data in the copy buffer.
 210 */
 211static void efx_memcpy_toio_aligned_cb(struct efx_nic *efx, u8 __iomem **piobuf,
 212                                       u8 *data, int len,
 213                                       struct efx_short_copy_buffer *copy_buf)
 214{
 215        if (copy_buf->used) {
 216                /* if the copy buffer is partially full, fill it up and write */
 217                int copy_to_buf =
 218                        min_t(int, sizeof(copy_buf->buf) - copy_buf->used, len);
 219
 220                memcpy(copy_buf->buf + copy_buf->used, data, copy_to_buf);
 221                copy_buf->used += copy_to_buf;
 222
 223                /* if we didn't fill it up then we're done for now */
 224                if (copy_buf->used < sizeof(copy_buf->buf))
 225                        return;
 226
 227                __iowrite64_copy(*piobuf, copy_buf->buf,
 228                                 sizeof(copy_buf->buf) >> 3);
 229                *piobuf += sizeof(copy_buf->buf);
 230                data += copy_to_buf;
 231                len -= copy_to_buf;
 232                copy_buf->used = 0;
 233        }
 234
 235        efx_memcpy_toio_aligned(efx, piobuf, data, len, copy_buf);
 236}
 237
 238static void efx_flush_copy_buffer(struct efx_nic *efx, u8 __iomem *piobuf,
 239                                  struct efx_short_copy_buffer *copy_buf)
 240{
 241        /* if there's anything in it, write the whole buffer, including junk */
 242        if (copy_buf->used)
 243                __iowrite64_copy(piobuf, copy_buf->buf,
 244                                 sizeof(copy_buf->buf) >> 3);
 245}
 246
 247/* Traverse skb structure and copy fragments in to PIO buffer.
 248 * Advances piobuf pointer.
 249 */
 250static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb,
 251                                     u8 __iomem **piobuf,
 252                                     struct efx_short_copy_buffer *copy_buf)
 253{
 254        int i;
 255
 256        efx_memcpy_toio_aligned(efx, piobuf, skb->data, skb_headlen(skb),
 257                                copy_buf);
 258
 259        for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
 260                skb_frag_t *f = &skb_shinfo(skb)->frags[i];
 261                u8 *vaddr;
 262
 263                vaddr = kmap_atomic(skb_frag_page(f));
 264
 265                efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + f->page_offset,
 266                                           skb_frag_size(f), copy_buf);
 267                kunmap_atomic(vaddr);
 268        }
 269
 270        EFX_WARN_ON_ONCE_PARANOID(skb_shinfo(skb)->frag_list);
 271}
 272
 273static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue,
 274                               struct sk_buff *skb)
 275{
 276        struct efx_tx_buffer *buffer =
 277                efx_tx_queue_get_insert_buffer(tx_queue);
 278        u8 __iomem *piobuf = tx_queue->piobuf;
 279
 280        /* Copy to PIO buffer. Ensure the writes are padded to the end
 281         * of a cache line, as this is required for write-combining to be
 282         * effective on at least x86.
 283         */
 284
 285        if (skb_shinfo(skb)->nr_frags) {
 286                /* The size of the copy buffer will ensure all writes
 287                 * are the size of a cache line.
 288                 */
 289                struct efx_short_copy_buffer copy_buf;
 290
 291                copy_buf.used = 0;
 292
 293                efx_skb_copy_bits_to_pio(tx_queue->efx, skb,
 294                                         &piobuf, &copy_buf);
 295                efx_flush_copy_buffer(tx_queue->efx, piobuf, &copy_buf);
 296        } else {
 297                /* Pad the write to the size of a cache line.
 298                 * We can do this because we know the skb_shared_info struct is
 299                 * after the source, and the destination buffer is big enough.
 300                 */
 301                BUILD_BUG_ON(L1_CACHE_BYTES >
 302                             SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
 303                __iowrite64_copy(tx_queue->piobuf, skb->data,
 304                                 ALIGN(skb->len, L1_CACHE_BYTES) >> 3);
 305        }
 306
 307        buffer->skb = skb;
 308        buffer->flags = EFX_TX_BUF_SKB | EFX_TX_BUF_OPTION;
 309
 310        EFX_POPULATE_QWORD_5(buffer->option,
 311                             ESF_DZ_TX_DESC_IS_OPT, 1,
 312                             ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO,
 313                             ESF_DZ_TX_PIO_CONT, 0,
 314                             ESF_DZ_TX_PIO_BYTE_CNT, skb->len,
 315                             ESF_DZ_TX_PIO_BUF_ADDR,
 316                             tx_queue->piobuf_offset);
 317        ++tx_queue->insert_count;
 318        return 0;
 319}
 320#endif /* EFX_USE_PIO */
 321
 322static struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
 323                                              dma_addr_t dma_addr,
 324                                              size_t len)
 325{
 326        const struct efx_nic_type *nic_type = tx_queue->efx->type;
 327        struct efx_tx_buffer *buffer;
 328        unsigned int dma_len;
 329
 330        /* Map the fragment taking account of NIC-dependent DMA limits. */
 331        do {
 332                buffer = efx_tx_queue_get_insert_buffer(tx_queue);
 333                dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
 334
 335                buffer->len = dma_len;
 336                buffer->dma_addr = dma_addr;
 337                buffer->flags = EFX_TX_BUF_CONT;
 338                len -= dma_len;
 339                dma_addr += dma_len;
 340                ++tx_queue->insert_count;
 341        } while (len);
 342
 343        return buffer;
 344}
 345
 346/* Map all data from an SKB for DMA and create descriptors on the queue.
 347 */
 348static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
 349                           unsigned int segment_count)
 350{
 351        struct efx_nic *efx = tx_queue->efx;
 352        struct device *dma_dev = &efx->pci_dev->dev;
 353        unsigned int frag_index, nr_frags;
 354        dma_addr_t dma_addr, unmap_addr;
 355        unsigned short dma_flags;
 356        size_t len, unmap_len;
 357
 358        nr_frags = skb_shinfo(skb)->nr_frags;
 359        frag_index = 0;
 360
 361        /* Map header data. */
 362        len = skb_headlen(skb);
 363        dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
 364        dma_flags = EFX_TX_BUF_MAP_SINGLE;
 365        unmap_len = len;
 366        unmap_addr = dma_addr;
 367
 368        if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
 369                return -EIO;
 370
 371        if (segment_count) {
 372                /* For TSO we need to put the header in to a separate
 373                 * descriptor. Map this separately if necessary.
 374                 */
 375                size_t header_len = skb_transport_header(skb) - skb->data +
 376                                (tcp_hdr(skb)->doff << 2u);
 377
 378                if (header_len != len) {
 379                        tx_queue->tso_long_headers++;
 380                        efx_tx_map_chunk(tx_queue, dma_addr, header_len);
 381                        len -= header_len;
 382                        dma_addr += header_len;
 383                }
 384        }
 385
 386        /* Add descriptors for each fragment. */
 387        do {
 388                struct efx_tx_buffer *buffer;
 389                skb_frag_t *fragment;
 390
 391                buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
 392
 393                /* The final descriptor for a fragment is responsible for
 394                 * unmapping the whole fragment.
 395                 */
 396                buffer->flags = EFX_TX_BUF_CONT | dma_flags;
 397                buffer->unmap_len = unmap_len;
 398                buffer->dma_offset = buffer->dma_addr - unmap_addr;
 399
 400                if (frag_index >= nr_frags) {
 401                        /* Store SKB details with the final buffer for
 402                         * the completion.
 403                         */
 404                        buffer->skb = skb;
 405                        buffer->flags = EFX_TX_BUF_SKB | dma_flags;
 406                        return 0;
 407                }
 408
 409                /* Move on to the next fragment. */
 410                fragment = &skb_shinfo(skb)->frags[frag_index++];
 411                len = skb_frag_size(fragment);
 412                dma_addr = skb_frag_dma_map(dma_dev, fragment,
 413                                0, len, DMA_TO_DEVICE);
 414                dma_flags = 0;
 415                unmap_len = len;
 416                unmap_addr = dma_addr;
 417
 418                if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
 419                        return -EIO;
 420        } while (1);
 421}
 422
 423/* Remove buffers put into a tx_queue.  None of the buffers must have
 424 * an skb attached.
 425 */
 426static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
 427{
 428        struct efx_tx_buffer *buffer;
 429
 430        /* Work backwards until we hit the original insert pointer value */
 431        while (tx_queue->insert_count != tx_queue->write_count) {
 432                --tx_queue->insert_count;
 433                buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
 434                efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
 435        }
 436}
 437
 438/*
 439 * Fallback to software TSO.
 440 *
 441 * This is used if we are unable to send a GSO packet through hardware TSO.
 442 * This should only ever happen due to per-queue restrictions - unsupported
 443 * packets should first be filtered by the feature flags.
 444 *
 445 * Returns 0 on success, error code otherwise.
 446 */
 447static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue,
 448                               struct sk_buff *skb)
 449{
 450        struct sk_buff *segments, *next;
 451
 452        segments = skb_gso_segment(skb, 0);
 453        if (IS_ERR(segments))
 454                return PTR_ERR(segments);
 455
 456        dev_kfree_skb_any(skb);
 457        skb = segments;
 458
 459        while (skb) {
 460                next = skb->next;
 461                skb->next = NULL;
 462
 463                if (next)
 464                        skb->xmit_more = true;
 465                efx_enqueue_skb(tx_queue, skb);
 466                skb = next;
 467        }
 468
 469        return 0;
 470}
 471
 472/*
 473 * Add a socket buffer to a TX queue
 474 *
 475 * This maps all fragments of a socket buffer for DMA and adds them to
 476 * the TX queue.  The queue's insert pointer will be incremented by
 477 * the number of fragments in the socket buffer.
 478 *
 479 * If any DMA mapping fails, any mapped fragments will be unmapped,
 480 * the queue's insert pointer will be restored to its original value.
 481 *
 482 * This function is split out from efx_hard_start_xmit to allow the
 483 * loopback test to direct packets via specific TX queues.
 484 *
 485 * Returns NETDEV_TX_OK.
 486 * You must hold netif_tx_lock() to call this function.
 487 */
 488netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 489{
 490        bool data_mapped = false;
 491        unsigned int segments;
 492        unsigned int skb_len;
 493        int rc;
 494
 495        skb_len = skb->len;
 496        segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0;
 497        if (segments == 1)
 498                segments = 0; /* Don't use TSO for a single segment. */
 499
 500        /* Handle TSO first - it's *possible* (although unlikely) that we might
 501         * be passed a packet to segment that's smaller than the copybreak/PIO
 502         * size limit.
 503         */
 504        if (segments) {
 505                EFX_WARN_ON_ONCE_PARANOID(!tx_queue->handle_tso);
 506                rc = tx_queue->handle_tso(tx_queue, skb, &data_mapped);
 507                if (rc == -EINVAL) {
 508                        rc = efx_tx_tso_fallback(tx_queue, skb);
 509                        tx_queue->tso_fallbacks++;
 510                        if (rc == 0)
 511                                return 0;
 512                }
 513                if (rc)
 514                        goto err;
 515#ifdef EFX_USE_PIO
 516        } else if (skb_len <= efx_piobuf_size && !skb->xmit_more &&
 517                   efx_nic_may_tx_pio(tx_queue)) {
 518                /* Use PIO for short packets with an empty queue. */
 519                if (efx_enqueue_skb_pio(tx_queue, skb))
 520                        goto err;
 521                tx_queue->pio_packets++;
 522                data_mapped = true;
 523#endif
 524        } else if (skb->data_len && skb_len <= EFX_TX_CB_SIZE) {
 525                /* Pad short packets or coalesce short fragmented packets. */
 526                if (efx_enqueue_skb_copy(tx_queue, skb))
 527                        goto err;
 528                tx_queue->cb_packets++;
 529                data_mapped = true;
 530        }
 531
 532        /* Map for DMA and create descriptors if we haven't done so already. */
 533        if (!data_mapped && (efx_tx_map_data(tx_queue, skb, segments)))
 534                goto err;
 535
 536        /* Update BQL */
 537        netdev_tx_sent_queue(tx_queue->core_txq, skb_len);
 538
 539        /* Pass off to hardware */
 540        if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
 541                struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
 542
 543                /* There could be packets left on the partner queue if those
 544                 * SKBs had skb->xmit_more set. If we do not push those they
 545                 * could be left for a long time and cause a netdev watchdog.
 546                 */
 547                if (txq2->xmit_more_available)
 548                        efx_nic_push_buffers(txq2);
 549
 550                efx_nic_push_buffers(tx_queue);
 551        } else {
 552                tx_queue->xmit_more_available = skb->xmit_more;
 553        }
 554
 555        if (segments) {
 556                tx_queue->tso_bursts++;
 557                tx_queue->tso_packets += segments;
 558                tx_queue->tx_packets  += segments;
 559        } else {
 560                tx_queue->tx_packets++;
 561        }
 562
 563        efx_tx_maybe_stop_queue(tx_queue);
 564
 565        return NETDEV_TX_OK;
 566
 567
 568err:
 569        efx_enqueue_unwind(tx_queue);
 570        dev_kfree_skb_any(skb);
 571        return NETDEV_TX_OK;
 572}
 573
 574/* Remove packets from the TX queue
 575 *
 576 * This removes packets from the TX queue, up to and including the
 577 * specified index.
 578 */
 579static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
 580                                unsigned int index,
 581                                unsigned int *pkts_compl,
 582                                unsigned int *bytes_compl)
 583{
 584        struct efx_nic *efx = tx_queue->efx;
 585        unsigned int stop_index, read_ptr;
 586
 587        stop_index = (index + 1) & tx_queue->ptr_mask;
 588        read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
 589
 590        while (read_ptr != stop_index) {
 591                struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
 592
 593                if (!(buffer->flags & EFX_TX_BUF_OPTION) &&
 594                    unlikely(buffer->len == 0)) {
 595                        netif_err(efx, tx_err, efx->net_dev,
 596                                  "TX queue %d spurious TX completion id %x\n",
 597                                  tx_queue->queue, read_ptr);
 598                        efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
 599                        return;
 600                }
 601
 602                efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
 603
 604                ++tx_queue->read_count;
 605                read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
 606        }
 607}
 608
 609/* Initiate a packet transmission.  We use one channel per CPU
 610 * (sharing when we have more CPUs than channels).  On Falcon, the TX
 611 * completion events will be directed back to the CPU that transmitted
 612 * the packet, which should be cache-efficient.
 613 *
 614 * Context: non-blocking.
 615 * Note that returning anything other than NETDEV_TX_OK will cause the
 616 * OS to free the skb.
 617 */
 618netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 619                                struct net_device *net_dev)
 620{
 621        struct efx_nic *efx = netdev_priv(net_dev);
 622        struct efx_tx_queue *tx_queue;
 623        unsigned index, type;
 624
 625        EFX_WARN_ON_PARANOID(!netif_device_present(net_dev));
 626
 627        /* PTP "event" packet */
 628        if (unlikely(efx_xmit_with_hwtstamp(skb)) &&
 629            unlikely(efx_ptp_is_ptp_tx(efx, skb))) {
 630                return efx_ptp_tx(efx, skb);
 631        }
 632
 633        index = skb_get_queue_mapping(skb);
 634        type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0;
 635        if (index >= efx->n_tx_channels) {
 636                index -= efx->n_tx_channels;
 637                type |= EFX_TXQ_TYPE_HIGHPRI;
 638        }
 639        tx_queue = efx_get_tx_queue(efx, index, type);
 640
 641        return efx_enqueue_skb(tx_queue, skb);
 642}
 643
 644void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
 645{
 646        struct efx_nic *efx = tx_queue->efx;
 647
 648        /* Must be inverse of queue lookup in efx_hard_start_xmit() */
 649        tx_queue->core_txq =
 650                netdev_get_tx_queue(efx->net_dev,
 651                                    tx_queue->queue / EFX_TXQ_TYPES +
 652                                    ((tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI) ?
 653                                     efx->n_tx_channels : 0));
 654}
 655
 656int efx_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
 657                 __be16 proto, struct tc_to_netdev *ntc)
 658{
 659        struct efx_nic *efx = netdev_priv(net_dev);
 660        struct efx_channel *channel;
 661        struct efx_tx_queue *tx_queue;
 662        unsigned tc, num_tc;
 663        int rc;
 664
 665        if (ntc->type != TC_SETUP_MQPRIO)
 666                return -EINVAL;
 667
 668        num_tc = ntc->mqprio->num_tc;
 669
 670        if (num_tc > EFX_MAX_TX_TC)
 671                return -EINVAL;
 672
 673        ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 674
 675        if (num_tc == net_dev->num_tc)
 676                return 0;
 677
 678        for (tc = 0; tc < num_tc; tc++) {
 679                net_dev->tc_to_txq[tc].offset = tc * efx->n_tx_channels;
 680                net_dev->tc_to_txq[tc].count = efx->n_tx_channels;
 681        }
 682
 683        if (num_tc > net_dev->num_tc) {
 684                /* Initialise high-priority queues as necessary */
 685                efx_for_each_channel(channel, efx) {
 686                        efx_for_each_possible_channel_tx_queue(tx_queue,
 687                                                               channel) {
 688                                if (!(tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI))
 689                                        continue;
 690                                if (!tx_queue->buffer) {
 691                                        rc = efx_probe_tx_queue(tx_queue);
 692                                        if (rc)
 693                                                return rc;
 694                                }
 695                                if (!tx_queue->initialised)
 696                                        efx_init_tx_queue(tx_queue);
 697                                efx_init_tx_queue_core_txq(tx_queue);
 698                        }
 699                }
 700        } else {
 701                /* Reduce number of classes before number of queues */
 702                net_dev->num_tc = num_tc;
 703        }
 704
 705        rc = netif_set_real_num_tx_queues(net_dev,
 706                                          max_t(int, num_tc, 1) *
 707                                          efx->n_tx_channels);
 708        if (rc)
 709                return rc;
 710
 711        /* Do not destroy high-priority queues when they become
 712         * unused.  We would have to flush them first, and it is
 713         * fairly difficult to flush a subset of TX queues.  Leave
 714         * it to efx_fini_channels().
 715         */
 716
 717        net_dev->num_tc = num_tc;
 718        return 0;
 719}
 720
 721void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 722{
 723        unsigned fill_level;
 724        struct efx_nic *efx = tx_queue->efx;
 725        struct efx_tx_queue *txq2;
 726        unsigned int pkts_compl = 0, bytes_compl = 0;
 727
 728        EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
 729
 730        efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
 731        tx_queue->pkts_compl += pkts_compl;
 732        tx_queue->bytes_compl += bytes_compl;
 733
 734        if (pkts_compl > 1)
 735                ++tx_queue->merge_events;
 736
 737        /* See if we need to restart the netif queue.  This memory
 738         * barrier ensures that we write read_count (inside
 739         * efx_dequeue_buffers()) before reading the queue status.
 740         */
 741        smp_mb();
 742        if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
 743            likely(efx->port_enabled) &&
 744            likely(netif_device_present(efx->net_dev))) {
 745                txq2 = efx_tx_queue_partner(tx_queue);
 746                fill_level = max(tx_queue->insert_count - tx_queue->read_count,
 747                                 txq2->insert_count - txq2->read_count);
 748                if (fill_level <= efx->txq_wake_thresh)
 749                        netif_tx_wake_queue(tx_queue->core_txq);
 750        }
 751
 752        /* Check whether the hardware queue is now empty */
 753        if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
 754                tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count);
 755                if (tx_queue->read_count == tx_queue->old_write_count) {
 756                        smp_mb();
 757                        tx_queue->empty_read_count =
 758                                tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
 759                }
 760        }
 761}
 762
 763static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
 764{
 765        return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EFX_TX_CB_ORDER);
 766}
 767
 768int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
 769{
 770        struct efx_nic *efx = tx_queue->efx;
 771        unsigned int entries;
 772        int rc;
 773
 774        /* Create the smallest power-of-two aligned ring */
 775        entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
 776        EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
 777        tx_queue->ptr_mask = entries - 1;
 778
 779        netif_dbg(efx, probe, efx->net_dev,
 780                  "creating TX queue %d size %#x mask %#x\n",
 781                  tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
 782
 783        /* Allocate software ring */
 784        tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer),
 785                                   GFP_KERNEL);
 786        if (!tx_queue->buffer)
 787                return -ENOMEM;
 788
 789        tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue),
 790                                    sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
 791        if (!tx_queue->cb_page) {
 792                rc = -ENOMEM;
 793                goto fail1;
 794        }
 795
 796        /* Allocate hardware ring */
 797        rc = efx_nic_probe_tx(tx_queue);
 798        if (rc)
 799                goto fail2;
 800
 801        return 0;
 802
 803fail2:
 804        kfree(tx_queue->cb_page);
 805        tx_queue->cb_page = NULL;
 806fail1:
 807        kfree(tx_queue->buffer);
 808        tx_queue->buffer = NULL;
 809        return rc;
 810}
 811
 812void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
 813{
 814        struct efx_nic *efx = tx_queue->efx;
 815
 816        netif_dbg(efx, drv, efx->net_dev,
 817                  "initialising TX queue %d\n", tx_queue->queue);
 818
 819        tx_queue->insert_count = 0;
 820        tx_queue->write_count = 0;
 821        tx_queue->packet_write_count = 0;
 822        tx_queue->old_write_count = 0;
 823        tx_queue->read_count = 0;
 824        tx_queue->old_read_count = 0;
 825        tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
 826        tx_queue->xmit_more_available = false;
 827
 828        /* Set up default function pointers. These may get replaced by
 829         * efx_nic_init_tx() based off NIC/queue capabilities.
 830         */
 831        tx_queue->handle_tso = efx_enqueue_skb_tso;
 832
 833        /* Set up TX descriptor ring */
 834        efx_nic_init_tx(tx_queue);
 835
 836        tx_queue->initialised = true;
 837}
 838
 839void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
 840{
 841        struct efx_tx_buffer *buffer;
 842
 843        netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
 844                  "shutting down TX queue %d\n", tx_queue->queue);
 845
 846        if (!tx_queue->buffer)
 847                return;
 848
 849        /* Free any buffers left in the ring */
 850        while (tx_queue->read_count != tx_queue->write_count) {
 851                unsigned int pkts_compl = 0, bytes_compl = 0;
 852                buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
 853                efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
 854
 855                ++tx_queue->read_count;
 856        }
 857        tx_queue->xmit_more_available = false;
 858        netdev_tx_reset_queue(tx_queue->core_txq);
 859}
 860
 861void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
 862{
 863        int i;
 864
 865        if (!tx_queue->buffer)
 866                return;
 867
 868        netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
 869                  "destroying TX queue %d\n", tx_queue->queue);
 870        efx_nic_remove_tx(tx_queue);
 871
 872        if (tx_queue->cb_page) {
 873                for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++)
 874                        efx_nic_free_buffer(tx_queue->efx,
 875                                            &tx_queue->cb_page[i]);
 876                kfree(tx_queue->cb_page);
 877                tx_queue->cb_page = NULL;
 878        }
 879
 880        kfree(tx_queue->buffer);
 881        tx_queue->buffer = NULL;
 882}
 883