linux/drivers/net/ethernet/sfc/ef100_tx.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/****************************************************************************
   3 * Driver for Solarflare network controllers and boards
   4 * Copyright 2018 Solarflare Communications Inc.
   5 * Copyright 2019-2020 Xilinx Inc.
   6 *
   7 * This program is free software; you can redistribute it and/or modify it
   8 * under the terms of the GNU General Public License version 2 as published
   9 * by the Free Software Foundation, incorporated herein by reference.
  10 */
  11
  12#include <net/ip6_checksum.h>
  13
  14#include "net_driver.h"
  15#include "tx_common.h"
  16#include "nic_common.h"
  17#include "mcdi_functions.h"
  18#include "ef100_regs.h"
  19#include "io.h"
  20#include "ef100_tx.h"
  21#include "ef100_nic.h"
  22
  23int ef100_tx_probe(struct efx_tx_queue *tx_queue)
  24{
  25        /* Allocate an extra descriptor for the QMDA status completion entry */
  26        return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf,
  27                                    (tx_queue->ptr_mask + 2) *
  28                                    sizeof(efx_oword_t),
  29                                    GFP_KERNEL);
  30}
  31
  32void ef100_tx_init(struct efx_tx_queue *tx_queue)
  33{
  34        /* must be the inverse of lookup in efx_get_tx_channel */
  35        tx_queue->core_txq =
  36                netdev_get_tx_queue(tx_queue->efx->net_dev,
  37                                    tx_queue->channel->channel -
  38                                    tx_queue->efx->tx_channel_offset);
  39
  40        /* This value is purely documentational; as EF100 never passes through
  41         * the switch statement in tx.c:__efx_enqueue_skb(), that switch does
  42         * not handle case 3.  EF100's TSOv3 descriptors are generated by
  43         * ef100_make_tso_desc().
  44         * Meanwhile, all efx_mcdi_tx_init() cares about is that it's not 2.
  45         */
  46        tx_queue->tso_version = 3;
  47        if (efx_mcdi_tx_init(tx_queue))
  48                netdev_WARN(tx_queue->efx->net_dev,
  49                            "failed to initialise TXQ %d\n", tx_queue->queue);
  50}
  51
  52static bool ef100_tx_can_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
  53{
  54        struct efx_nic *efx = tx_queue->efx;
  55        struct ef100_nic_data *nic_data;
  56        struct efx_tx_buffer *buffer;
  57        size_t header_len;
  58        u32 mss;
  59
  60        nic_data = efx->nic_data;
  61
  62        if (!skb_is_gso_tcp(skb))
  63                return false;
  64        if (!(efx->net_dev->features & NETIF_F_TSO))
  65                return false;
  66
  67        mss = skb_shinfo(skb)->gso_size;
  68        if (unlikely(mss < 4)) {
  69                WARN_ONCE(1, "MSS of %u is too small for TSO\n", mss);
  70                return false;
  71        }
  72
  73        header_len = efx_tx_tso_header_length(skb);
  74        if (header_len > nic_data->tso_max_hdr_len)
  75                return false;
  76
  77        if (skb_shinfo(skb)->gso_segs > nic_data->tso_max_payload_num_segs) {
  78                /* net_dev->gso_max_segs should've caught this */
  79                WARN_ON_ONCE(1);
  80                return false;
  81        }
  82
  83        if (skb->data_len / mss > nic_data->tso_max_frames)
  84                return false;
  85
  86        /* net_dev->gso_max_size should've caught this */
  87        if (WARN_ON_ONCE(skb->data_len > nic_data->tso_max_payload_len))
  88                return false;
  89
  90        /* Reserve an empty buffer for the TSO V3 descriptor.
  91         * Convey the length of the header since we already know it.
  92         */
  93        buffer = efx_tx_queue_get_insert_buffer(tx_queue);
  94        buffer->flags = EFX_TX_BUF_TSO_V3 | EFX_TX_BUF_CONT;
  95        buffer->len = header_len;
  96        buffer->unmap_len = 0;
  97        buffer->skb = skb;
  98        ++tx_queue->insert_count;
  99        return true;
 100}
 101
 102static efx_oword_t *ef100_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index)
 103{
 104        if (likely(tx_queue->txd.buf.addr))
 105                return ((efx_oword_t *)tx_queue->txd.buf.addr) + index;
 106        else
 107                return NULL;
 108}
 109
 110static void ef100_notify_tx_desc(struct efx_tx_queue *tx_queue)
 111{
 112        unsigned int write_ptr;
 113        efx_dword_t reg;
 114
 115        tx_queue->xmit_pending = false;
 116
 117        if (unlikely(tx_queue->notify_count == tx_queue->write_count))
 118                return;
 119
 120        write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
 121        /* The write pointer goes into the high word */
 122        EFX_POPULATE_DWORD_1(reg, ERF_GZ_TX_RING_PIDX, write_ptr);
 123        efx_writed_page(tx_queue->efx, &reg,
 124                        ER_GZ_TX_RING_DOORBELL, tx_queue->queue);
 125        tx_queue->notify_count = tx_queue->write_count;
 126}
 127
 128static void ef100_tx_push_buffers(struct efx_tx_queue *tx_queue)
 129{
 130        ef100_notify_tx_desc(tx_queue);
 131        ++tx_queue->pushes;
 132}
 133
 134static void ef100_set_tx_csum_partial(const struct sk_buff *skb,
 135                                      struct efx_tx_buffer *buffer, efx_oword_t *txd)
 136{
 137        efx_oword_t csum;
 138        int csum_start;
 139
 140        if (!skb || skb->ip_summed != CHECKSUM_PARTIAL)
 141                return;
 142
 143        /* skb->csum_start has the offset from head, but we need the offset
 144         * from data.
 145         */
 146        csum_start = skb_checksum_start_offset(skb);
 147        EFX_POPULATE_OWORD_3(csum,
 148                             ESF_GZ_TX_SEND_CSO_PARTIAL_EN, 1,
 149                             ESF_GZ_TX_SEND_CSO_PARTIAL_START_W,
 150                             csum_start >> 1,
 151                             ESF_GZ_TX_SEND_CSO_PARTIAL_CSUM_W,
 152                             skb->csum_offset >> 1);
 153        EFX_OR_OWORD(*txd, *txd, csum);
 154}
 155
 156static void ef100_set_tx_hw_vlan(const struct sk_buff *skb, efx_oword_t *txd)
 157{
 158        u16 vlan_tci = skb_vlan_tag_get(skb);
 159        efx_oword_t vlan;
 160
 161        EFX_POPULATE_OWORD_2(vlan,
 162                             ESF_GZ_TX_SEND_VLAN_INSERT_EN, 1,
 163                             ESF_GZ_TX_SEND_VLAN_INSERT_TCI, vlan_tci);
 164        EFX_OR_OWORD(*txd, *txd, vlan);
 165}
 166
 167static void ef100_make_send_desc(struct efx_nic *efx,
 168                                 const struct sk_buff *skb,
 169                                 struct efx_tx_buffer *buffer, efx_oword_t *txd,
 170                                 unsigned int segment_count)
 171{
 172        /* TX send descriptor */
 173        EFX_POPULATE_OWORD_3(*txd,
 174                             ESF_GZ_TX_SEND_NUM_SEGS, segment_count,
 175                             ESF_GZ_TX_SEND_LEN, buffer->len,
 176                             ESF_GZ_TX_SEND_ADDR, buffer->dma_addr);
 177
 178        if (likely(efx->net_dev->features & NETIF_F_HW_CSUM))
 179                ef100_set_tx_csum_partial(skb, buffer, txd);
 180        if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX &&
 181            skb && skb_vlan_tag_present(skb))
 182                ef100_set_tx_hw_vlan(skb, txd);
 183}
 184
 185static void ef100_make_tso_desc(struct efx_nic *efx,
 186                                const struct sk_buff *skb,
 187                                struct efx_tx_buffer *buffer, efx_oword_t *txd,
 188                                unsigned int segment_count)
 189{
 190        bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL;
 191        unsigned int len, ip_offset, tcp_offset, payload_segs;
 192        u32 mangleid = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16;
 193        unsigned int outer_ip_offset, outer_l4_offset;
 194        u16 vlan_tci = skb_vlan_tag_get(skb);
 195        u32 mss = skb_shinfo(skb)->gso_size;
 196        bool encap = skb->encapsulation;
 197        bool udp_encap = false;
 198        u16 vlan_enable = 0;
 199        struct tcphdr *tcp;
 200        bool outer_csum;
 201        u32 paylen;
 202
 203        if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID)
 204                mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP;
 205        if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX)
 206                vlan_enable = skb_vlan_tag_present(skb);
 207
 208        len = skb->len - buffer->len;
 209        /* We use 1 for the TSO descriptor and 1 for the header */
 210        payload_segs = segment_count - 2;
 211        if (encap) {
 212                outer_ip_offset = skb_network_offset(skb);
 213                outer_l4_offset = skb_transport_offset(skb);
 214                ip_offset = skb_inner_network_offset(skb);
 215                tcp_offset = skb_inner_transport_offset(skb);
 216                if (skb_shinfo(skb)->gso_type &
 217                    (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))
 218                        udp_encap = true;
 219        } else {
 220                ip_offset =  skb_network_offset(skb);
 221                tcp_offset = skb_transport_offset(skb);
 222                outer_ip_offset = outer_l4_offset = 0;
 223        }
 224        outer_csum = skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM;
 225
 226        /* subtract TCP payload length from inner checksum */
 227        tcp = (void *)skb->data + tcp_offset;
 228        paylen = skb->len - tcp_offset;
 229        csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen));
 230
 231        EFX_POPULATE_OWORD_19(*txd,
 232                              ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_TSO,
 233                              ESF_GZ_TX_TSO_MSS, mss,
 234                              ESF_GZ_TX_TSO_HDR_NUM_SEGS, 1,
 235                              ESF_GZ_TX_TSO_PAYLOAD_NUM_SEGS, payload_segs,
 236                              ESF_GZ_TX_TSO_HDR_LEN_W, buffer->len >> 1,
 237                              ESF_GZ_TX_TSO_PAYLOAD_LEN, len,
 238                              ESF_GZ_TX_TSO_CSO_OUTER_L4, outer_csum,
 239                              ESF_GZ_TX_TSO_CSO_INNER_L4, 1,
 240                              ESF_GZ_TX_TSO_INNER_L3_OFF_W, ip_offset >> 1,
 241                              ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcp_offset >> 1,
 242                              ESF_GZ_TX_TSO_ED_INNER_IP4_ID, mangleid,
 243                              ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1,
 244                              ESF_GZ_TX_TSO_OUTER_L3_OFF_W, outer_ip_offset >> 1,
 245                              ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1,
 246                              ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, udp_encap && !gso_partial,
 247                              ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, encap && !gso_partial,
 248                              ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid :
 249                                                                     ESE_GZ_TX_DESC_IP4_ID_NO_OP,
 250                              ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable,
 251                              ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci
 252                );
 253}
 254
 255static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
 256                                      const struct sk_buff *skb,
 257                                      unsigned int segment_count)
 258{
 259        unsigned int old_write_count = tx_queue->write_count;
 260        unsigned int new_write_count = old_write_count;
 261        struct efx_tx_buffer *buffer;
 262        unsigned int next_desc_type;
 263        unsigned int write_ptr;
 264        efx_oword_t *txd;
 265        unsigned int nr_descs = tx_queue->insert_count - old_write_count;
 266
 267        if (unlikely(nr_descs == 0))
 268                return;
 269
 270        if (segment_count)
 271                next_desc_type = ESE_GZ_TX_DESC_TYPE_TSO;
 272        else
 273                next_desc_type = ESE_GZ_TX_DESC_TYPE_SEND;
 274
 275        /* if it's a raw write (such as XDP) then always SEND single frames */
 276        if (!skb)
 277                nr_descs = 1;
 278
 279        do {
 280                write_ptr = new_write_count & tx_queue->ptr_mask;
 281                buffer = &tx_queue->buffer[write_ptr];
 282                txd = ef100_tx_desc(tx_queue, write_ptr);
 283                ++new_write_count;
 284
 285                /* Create TX descriptor ring entry */
 286                tx_queue->packet_write_count = new_write_count;
 287
 288                switch (next_desc_type) {
 289                case ESE_GZ_TX_DESC_TYPE_SEND:
 290                        ef100_make_send_desc(tx_queue->efx, skb,
 291                                             buffer, txd, nr_descs);
 292                        break;
 293                case ESE_GZ_TX_DESC_TYPE_TSO:
 294                        /* TX TSO descriptor */
 295                        WARN_ON_ONCE(!(buffer->flags & EFX_TX_BUF_TSO_V3));
 296                        ef100_make_tso_desc(tx_queue->efx, skb,
 297                                            buffer, txd, nr_descs);
 298                        break;
 299                default:
 300                        /* TX segment descriptor */
 301                        EFX_POPULATE_OWORD_3(*txd,
 302                                             ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_SEG,
 303                                             ESF_GZ_TX_SEG_LEN, buffer->len,
 304                                             ESF_GZ_TX_SEG_ADDR, buffer->dma_addr);
 305                }
 306                /* if it's a raw write (such as XDP) then always SEND */
 307                next_desc_type = skb ? ESE_GZ_TX_DESC_TYPE_SEG :
 308                                       ESE_GZ_TX_DESC_TYPE_SEND;
 309
 310        } while (new_write_count != tx_queue->insert_count);
 311
 312        wmb(); /* Ensure descriptors are written before they are fetched */
 313
 314        tx_queue->write_count = new_write_count;
 315
 316        /* The write_count above must be updated before reading
 317         * channel->holdoff_doorbell to avoid a race with the
 318         * completion path, so ensure these operations are not
 319         * re-ordered.  This also flushes the update of write_count
 320         * back into the cache.
 321         */
 322        smp_mb();
 323}
 324
 325void ef100_tx_write(struct efx_tx_queue *tx_queue)
 326{
 327        ef100_tx_make_descriptors(tx_queue, NULL, 0);
 328        ef100_tx_push_buffers(tx_queue);
 329}
 330
 331void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event)
 332{
 333        unsigned int tx_done =
 334                EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_NUM_DESC);
 335        unsigned int qlabel =
 336                EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_Q_LABEL);
 337        struct efx_tx_queue *tx_queue =
 338                efx_channel_get_tx_queue(channel, qlabel);
 339        unsigned int tx_index = (tx_queue->read_count + tx_done - 1) &
 340                                tx_queue->ptr_mask;
 341
 342        efx_xmit_done(tx_queue, tx_index);
 343}
 344
 345/* Add a socket buffer to a TX queue
 346 *
 347 * You must hold netif_tx_lock() to call this function.
 348 *
 349 * Returns 0 on success, error code otherwise. In case of an error this
 350 * function will free the SKB.
 351 */
 352int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 353{
 354        unsigned int old_insert_count = tx_queue->insert_count;
 355        struct efx_nic *efx = tx_queue->efx;
 356        bool xmit_more = netdev_xmit_more();
 357        unsigned int fill_level;
 358        unsigned int segments;
 359        int rc;
 360
 361        if (!tx_queue->buffer || !tx_queue->ptr_mask) {
 362                netif_stop_queue(efx->net_dev);
 363                dev_kfree_skb_any(skb);
 364                return -ENODEV;
 365        }
 366
 367        segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0;
 368        if (segments == 1)
 369                segments = 0;   /* Don't use TSO/GSO for a single segment. */
 370        if (segments && !ef100_tx_can_tso(tx_queue, skb)) {
 371                rc = efx_tx_tso_fallback(tx_queue, skb);
 372                tx_queue->tso_fallbacks++;
 373                if (rc)
 374                        goto err;
 375                else
 376                        return 0;
 377        }
 378
 379        /* Map for DMA and create descriptors */
 380        rc = efx_tx_map_data(tx_queue, skb, segments);
 381        if (rc)
 382                goto err;
 383        ef100_tx_make_descriptors(tx_queue, skb, segments);
 384
 385        fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
 386        if (fill_level > efx->txq_stop_thresh) {
 387                struct efx_tx_queue *txq2;
 388
 389                netif_tx_stop_queue(tx_queue->core_txq);
 390                /* Re-read after a memory barrier in case we've raced with
 391                 * the completion path. Otherwise there's a danger we'll never
 392                 * restart the queue if all completions have just happened.
 393                 */
 394                smp_mb();
 395                efx_for_each_channel_tx_queue(txq2, tx_queue->channel)
 396                        txq2->old_read_count = READ_ONCE(txq2->read_count);
 397                fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
 398                if (fill_level < efx->txq_stop_thresh)
 399                        netif_tx_start_queue(tx_queue->core_txq);
 400        }
 401
 402        tx_queue->xmit_pending = true;
 403
 404        /* If xmit_more then we don't need to push the doorbell, unless there
 405         * are 256 descriptors already queued in which case we have to push to
 406         * ensure we never push more than 256 at once.
 407         */
 408        if (__netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
 409            tx_queue->write_count - tx_queue->notify_count > 255)
 410                ef100_tx_push_buffers(tx_queue);
 411
 412        if (segments) {
 413                tx_queue->tso_bursts++;
 414                tx_queue->tso_packets += segments;
 415                tx_queue->tx_packets  += segments;
 416        } else {
 417                tx_queue->tx_packets++;
 418        }
 419        return 0;
 420
 421err:
 422        efx_enqueue_unwind(tx_queue, old_insert_count);
 423        if (!IS_ERR_OR_NULL(skb))
 424                dev_kfree_skb_any(skb);
 425
 426        /* If we're not expecting another transmit and we had something to push
 427         * on this queue then we need to push here to get the previous packets
 428         * out.  We only enter this branch from before the xmit_more handling
 429         * above, so xmit_pending still refers to the old state.
 430         */
 431        if (tx_queue->xmit_pending && !xmit_more)
 432                ef100_tx_push_buffers(tx_queue);
 433        return rc;
 434}
 435