linux/drivers/net/ethernet/intel/i40e/i40e_txrx.c
<<
>>
Prefs
   1/*******************************************************************************
   2 *
   3 * Intel Ethernet Controller XL710 Family Linux Driver
   4 * Copyright(c) 2013 - 2016 Intel Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms and conditions of the GNU General Public License,
   8 * version 2, as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 *
  15 * You should have received a copy of the GNU General Public License along
  16 * with this program.  If not, see <http://www.gnu.org/licenses/>.
  17 *
  18 * The full GNU General Public License is included in this distribution in
  19 * the file called "COPYING".
  20 *
  21 * Contact Information:
  22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
  23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
  24 *
  25 ******************************************************************************/
  26
  27#include <linux/prefetch.h>
  28#include <net/busy_poll.h>
  29#include "i40e.h"
  30#include "i40e_prototype.h"
  31
  32static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
  33                                u32 td_tag)
  34{
  35        return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
  36                           ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
  37                           ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
  38                           ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
  39                           ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
  40}
  41
  42#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
  43/**
  44 * i40e_fdir - Generate a Flow Director descriptor based on fdata
  45 * @tx_ring: Tx ring to send buffer on
  46 * @fdata: Flow director filter data
  47 * @add: Indicate if we are adding a rule or deleting one
  48 *
  49 **/
  50static void i40e_fdir(struct i40e_ring *tx_ring,
  51                      struct i40e_fdir_filter *fdata, bool add)
  52{
  53        struct i40e_filter_program_desc *fdir_desc;
  54        struct i40e_pf *pf = tx_ring->vsi->back;
  55        u32 flex_ptype, dtype_cmd;
  56        u16 i;
  57
  58        /* grab the next descriptor */
  59        i = tx_ring->next_to_use;
  60        fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
  61
  62        i++;
  63        tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
  64
  65        flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK &
  66                     (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT);
  67
  68        flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK &
  69                      (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT);
  70
  71        flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK &
  72                      (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
  73
  74        /* Use LAN VSI Id if not programmed by user */
  75        flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK &
  76                      ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) <<
  77                       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT);
  78
  79        dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
  80
  81        dtype_cmd |= add ?
  82                     I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
  83                     I40E_TXD_FLTR_QW1_PCMD_SHIFT :
  84                     I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
  85                     I40E_TXD_FLTR_QW1_PCMD_SHIFT;
  86
  87        dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK &
  88                     (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT);
  89
  90        dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK &
  91                     (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT);
  92
  93        if (fdata->cnt_index) {
  94                dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
  95                dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK &
  96                             ((u32)fdata->cnt_index <<
  97                              I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT);
  98        }
  99
 100        fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
 101        fdir_desc->rsvd = cpu_to_le32(0);
 102        fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
 103        fdir_desc->fd_id = cpu_to_le32(fdata->fd_id);
 104}
 105
 106#define I40E_FD_CLEAN_DELAY 10
 107/**
 108 * i40e_program_fdir_filter - Program a Flow Director filter
 109 * @fdir_data: Packet data that will be filter parameters
 110 * @raw_packet: the pre-allocated packet buffer for FDir
 111 * @pf: The PF pointer
 112 * @add: True for add/update, False for remove
 113 **/
 114static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
 115                                    u8 *raw_packet, struct i40e_pf *pf,
 116                                    bool add)
 117{
 118        struct i40e_tx_buffer *tx_buf, *first;
 119        struct i40e_tx_desc *tx_desc;
 120        struct i40e_ring *tx_ring;
 121        struct i40e_vsi *vsi;
 122        struct device *dev;
 123        dma_addr_t dma;
 124        u32 td_cmd = 0;
 125        u16 delay = 0;
 126        u16 i;
 127
 128        /* find existing FDIR VSI */
 129        vsi = NULL;
 130        for (i = 0; i < pf->num_alloc_vsi; i++)
 131                if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
 132                        vsi = pf->vsi[i];
 133        if (!vsi)
 134                return -ENOENT;
 135
 136        tx_ring = vsi->tx_rings[0];
 137        dev = tx_ring->dev;
 138
 139        /* we need two descriptors to add/del a filter and we can wait */
 140        do {
 141                if (I40E_DESC_UNUSED(tx_ring) > 1)
 142                        break;
 143                msleep_interruptible(1);
 144                delay++;
 145        } while (delay < I40E_FD_CLEAN_DELAY);
 146
 147        if (!(I40E_DESC_UNUSED(tx_ring) > 1))
 148                return -EAGAIN;
 149
 150        dma = dma_map_single(dev, raw_packet,
 151                             I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
 152        if (dma_mapping_error(dev, dma))
 153                goto dma_fail;
 154
 155        /* grab the next descriptor */
 156        i = tx_ring->next_to_use;
 157        first = &tx_ring->tx_bi[i];
 158        i40e_fdir(tx_ring, fdir_data, add);
 159
 160        /* Now program a dummy descriptor */
 161        i = tx_ring->next_to_use;
 162        tx_desc = I40E_TX_DESC(tx_ring, i);
 163        tx_buf = &tx_ring->tx_bi[i];
 164
 165        tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
 166
 167        memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
 168
 169        /* record length, and DMA address */
 170        dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
 171        dma_unmap_addr_set(tx_buf, dma, dma);
 172
 173        tx_desc->buffer_addr = cpu_to_le64(dma);
 174        td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
 175
 176        tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
 177        tx_buf->raw_buf = (void *)raw_packet;
 178
 179        tx_desc->cmd_type_offset_bsz =
 180                build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
 181
 182        /* Force memory writes to complete before letting h/w
 183         * know there are new descriptors to fetch.
 184         */
 185        wmb();
 186
 187        /* Mark the data descriptor to be watched */
 188        first->next_to_watch = tx_desc;
 189
 190        writel(tx_ring->next_to_use, tx_ring->tail);
 191        return 0;
 192
 193dma_fail:
 194        return -1;
 195}
 196
 197#define IP_HEADER_OFFSET 14
 198#define I40E_UDPIP_DUMMY_PACKET_LEN 42
 199/**
 200 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
 201 * @vsi: pointer to the targeted VSI
 202 * @fd_data: the flow director data required for the FDir descriptor
 203 * @add: true adds a filter, false removes it
 204 *
 205 * Returns 0 if the filters were successfully added or removed
 206 **/
 207static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
 208                                   struct i40e_fdir_filter *fd_data,
 209                                   bool add)
 210{
 211        struct i40e_pf *pf = vsi->back;
 212        struct udphdr *udp;
 213        struct iphdr *ip;
 214        bool err = false;
 215        u8 *raw_packet;
 216        int ret;
 217        static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
 218                0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
 219                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 220
 221        raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
 222        if (!raw_packet)
 223                return -ENOMEM;
 224        memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
 225
 226        ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
 227        udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
 228              + sizeof(struct iphdr));
 229
 230        ip->daddr = fd_data->dst_ip[0];
 231        udp->dest = fd_data->dst_port;
 232        ip->saddr = fd_data->src_ip[0];
 233        udp->source = fd_data->src_port;
 234
 235        fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
 236        ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
 237        if (ret) {
 238                dev_info(&pf->pdev->dev,
 239                         "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
 240                         fd_data->pctype, fd_data->fd_id, ret);
 241                err = true;
 242        } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
 243                if (add)
 244                        dev_info(&pf->pdev->dev,
 245                                 "Filter OK for PCTYPE %d loc = %d\n",
 246                                 fd_data->pctype, fd_data->fd_id);
 247                else
 248                        dev_info(&pf->pdev->dev,
 249                                 "Filter deleted for PCTYPE %d loc = %d\n",
 250                                 fd_data->pctype, fd_data->fd_id);
 251        }
 252        if (err)
 253                kfree(raw_packet);
 254
 255        return err ? -EOPNOTSUPP : 0;
 256}
 257
 258#define I40E_TCPIP_DUMMY_PACKET_LEN 54
 259/**
 260 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
 261 * @vsi: pointer to the targeted VSI
 262 * @fd_data: the flow director data required for the FDir descriptor
 263 * @add: true adds a filter, false removes it
 264 *
 265 * Returns 0 if the filters were successfully added or removed
 266 **/
 267static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
 268                                   struct i40e_fdir_filter *fd_data,
 269                                   bool add)
 270{
 271        struct i40e_pf *pf = vsi->back;
 272        struct tcphdr *tcp;
 273        struct iphdr *ip;
 274        bool err = false;
 275        u8 *raw_packet;
 276        int ret;
 277        /* Dummy packet */
 278        static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
 279                0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
 280                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
 281                0x0, 0x72, 0, 0, 0, 0};
 282
 283        raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
 284        if (!raw_packet)
 285                return -ENOMEM;
 286        memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
 287
 288        ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
 289        tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
 290              + sizeof(struct iphdr));
 291
 292        ip->daddr = fd_data->dst_ip[0];
 293        tcp->dest = fd_data->dst_port;
 294        ip->saddr = fd_data->src_ip[0];
 295        tcp->source = fd_data->src_port;
 296
 297        if (add) {
 298                pf->fd_tcp_rule++;
 299                if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
 300                    I40E_DEBUG_FD & pf->hw.debug_mask)
 301                        dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
 302                pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
 303        } else {
 304                pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
 305                                  (pf->fd_tcp_rule - 1) : 0;
 306                if (pf->fd_tcp_rule == 0) {
 307                        if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
 308                            I40E_DEBUG_FD & pf->hw.debug_mask)
 309                                dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
 310                        pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
 311                }
 312        }
 313
 314        fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
 315        ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
 316
 317        if (ret) {
 318                dev_info(&pf->pdev->dev,
 319                         "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
 320                         fd_data->pctype, fd_data->fd_id, ret);
 321                err = true;
 322        } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
 323                if (add)
 324                        dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
 325                                 fd_data->pctype, fd_data->fd_id);
 326                else
 327                        dev_info(&pf->pdev->dev,
 328                                 "Filter deleted for PCTYPE %d loc = %d\n",
 329                                 fd_data->pctype, fd_data->fd_id);
 330        }
 331
 332        if (err)
 333                kfree(raw_packet);
 334
 335        return err ? -EOPNOTSUPP : 0;
 336}
 337
 338/**
 339 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
 340 * a specific flow spec
 341 * @vsi: pointer to the targeted VSI
 342 * @fd_data: the flow director data required for the FDir descriptor
 343 * @add: true adds a filter, false removes it
 344 *
 345 * Returns 0 if the filters were successfully added or removed
 346 **/
 347static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
 348                                    struct i40e_fdir_filter *fd_data,
 349                                    bool add)
 350{
 351        return -EOPNOTSUPP;
 352}
 353
 354#define I40E_IP_DUMMY_PACKET_LEN 34
 355/**
 356 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
 357 * a specific flow spec
 358 * @vsi: pointer to the targeted VSI
 359 * @fd_data: the flow director data required for the FDir descriptor
 360 * @add: true adds a filter, false removes it
 361 *
 362 * Returns 0 if the filters were successfully added or removed
 363 **/
 364static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
 365                                  struct i40e_fdir_filter *fd_data,
 366                                  bool add)
 367{
 368        struct i40e_pf *pf = vsi->back;
 369        struct iphdr *ip;
 370        bool err = false;
 371        u8 *raw_packet;
 372        int ret;
 373        int i;
 374        static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
 375                0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
 376                0, 0, 0, 0};
 377
 378        for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
 379             i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
 380                raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
 381                if (!raw_packet)
 382                        return -ENOMEM;
 383                memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
 384                ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
 385
 386                ip->saddr = fd_data->src_ip[0];
 387                ip->daddr = fd_data->dst_ip[0];
 388                ip->protocol = 0;
 389
 390                fd_data->pctype = i;
 391                ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
 392
 393                if (ret) {
 394                        dev_info(&pf->pdev->dev,
 395                                 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
 396                                 fd_data->pctype, fd_data->fd_id, ret);
 397                        err = true;
 398                } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
 399                        if (add)
 400                                dev_info(&pf->pdev->dev,
 401                                         "Filter OK for PCTYPE %d loc = %d\n",
 402                                         fd_data->pctype, fd_data->fd_id);
 403                        else
 404                                dev_info(&pf->pdev->dev,
 405                                         "Filter deleted for PCTYPE %d loc = %d\n",
 406                                         fd_data->pctype, fd_data->fd_id);
 407                }
 408        }
 409
 410        if (err)
 411                kfree(raw_packet);
 412
 413        return err ? -EOPNOTSUPP : 0;
 414}
 415
 416/**
 417 * i40e_add_del_fdir - Build raw packets to add/del fdir filter
 418 * @vsi: pointer to the targeted VSI
 419 * @cmd: command to get or set RX flow classification rules
 420 * @add: true adds a filter, false removes it
 421 *
 422 **/
 423int i40e_add_del_fdir(struct i40e_vsi *vsi,
 424                      struct i40e_fdir_filter *input, bool add)
 425{
 426        struct i40e_pf *pf = vsi->back;
 427        int ret;
 428
 429        switch (input->flow_type & ~FLOW_EXT) {
 430        case TCP_V4_FLOW:
 431                ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
 432                break;
 433        case UDP_V4_FLOW:
 434                ret = i40e_add_del_fdir_udpv4(vsi, input, add);
 435                break;
 436        case SCTP_V4_FLOW:
 437                ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
 438                break;
 439        case IPV4_FLOW:
 440                ret = i40e_add_del_fdir_ipv4(vsi, input, add);
 441                break;
 442        case IP_USER_FLOW:
 443                switch (input->ip4_proto) {
 444                case IPPROTO_TCP:
 445                        ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
 446                        break;
 447                case IPPROTO_UDP:
 448                        ret = i40e_add_del_fdir_udpv4(vsi, input, add);
 449                        break;
 450                case IPPROTO_SCTP:
 451                        ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
 452                        break;
 453                default:
 454                        ret = i40e_add_del_fdir_ipv4(vsi, input, add);
 455                        break;
 456                }
 457                break;
 458        default:
 459                dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
 460                         input->flow_type);
 461                ret = -EINVAL;
 462        }
 463
 464        /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
 465        return ret;
 466}
 467
 468/**
 469 * i40e_fd_handle_status - check the Programming Status for FD
 470 * @rx_ring: the Rx ring for this descriptor
 471 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
 472 * @prog_id: the id originally used for programming
 473 *
 474 * This is used to verify if the FD programming or invalidation
 475 * requested by SW to the HW is successful or not and take actions accordingly.
 476 **/
 477static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
 478                                  union i40e_rx_desc *rx_desc, u8 prog_id)
 479{
 480        struct i40e_pf *pf = rx_ring->vsi->back;
 481        struct pci_dev *pdev = pf->pdev;
 482        u32 fcnt_prog, fcnt_avail;
 483        u32 error;
 484        u64 qw;
 485
 486        qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
 487        error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
 488                I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
 489
 490        if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
 491                pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
 492                if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
 493                    (I40E_DEBUG_FD & pf->hw.debug_mask))
 494                        dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
 495                                 pf->fd_inv);
 496
 497                /* Check if the programming error is for ATR.
 498                 * If so, auto disable ATR and set a state for
 499                 * flush in progress. Next time we come here if flush is in
 500                 * progress do nothing, once flush is complete the state will
 501                 * be cleared.
 502                 */
 503                if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
 504                        return;
 505
 506                pf->fd_add_err++;
 507                /* store the current atr filter count */
 508                pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
 509
 510                if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
 511                    (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
 512                        pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
 513                        set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
 514                }
 515
 516                /* filter programming failed most likely due to table full */
 517                fcnt_prog = i40e_get_global_fd_count(pf);
 518                fcnt_avail = pf->fdir_pf_filter_count;
 519                /* If ATR is running fcnt_prog can quickly change,
 520                 * if we are very close to full, it makes sense to disable
 521                 * FD ATR/SB and then re-enable it when there is room.
 522                 */
 523                if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
 524                        if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
 525                            !(pf->auto_disable_flags &
 526                                     I40E_FLAG_FD_SB_ENABLED)) {
 527                                if (I40E_DEBUG_FD & pf->hw.debug_mask)
 528                                        dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
 529                                pf->auto_disable_flags |=
 530                                                        I40E_FLAG_FD_SB_ENABLED;
 531                        }
 532                }
 533        } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
 534                if (I40E_DEBUG_FD & pf->hw.debug_mask)
 535                        dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
 536                                 rx_desc->wb.qword0.hi_dword.fd_id);
 537        }
 538}
 539
 540/**
 541 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
 542 * @ring:      the ring that owns the buffer
 543 * @tx_buffer: the buffer to free
 544 **/
 545static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
 546                                            struct i40e_tx_buffer *tx_buffer)
 547{
 548        if (tx_buffer->skb) {
 549                if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
 550                        kfree(tx_buffer->raw_buf);
 551                else
 552                        dev_kfree_skb_any(tx_buffer->skb);
 553                if (dma_unmap_len(tx_buffer, len))
 554                        dma_unmap_single(ring->dev,
 555                                         dma_unmap_addr(tx_buffer, dma),
 556                                         dma_unmap_len(tx_buffer, len),
 557                                         DMA_TO_DEVICE);
 558        } else if (dma_unmap_len(tx_buffer, len)) {
 559                dma_unmap_page(ring->dev,
 560                               dma_unmap_addr(tx_buffer, dma),
 561                               dma_unmap_len(tx_buffer, len),
 562                               DMA_TO_DEVICE);
 563        }
 564
 565        tx_buffer->next_to_watch = NULL;
 566        tx_buffer->skb = NULL;
 567        dma_unmap_len_set(tx_buffer, len, 0);
 568        /* tx_buffer must be completely set up in the transmit path */
 569}
 570
 571/**
 572 * i40e_clean_tx_ring - Free any empty Tx buffers
 573 * @tx_ring: ring to be cleaned
 574 **/
 575void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
 576{
 577        unsigned long bi_size;
 578        u16 i;
 579
 580        /* ring already cleared, nothing to do */
 581        if (!tx_ring->tx_bi)
 582                return;
 583
 584        /* Free all the Tx ring sk_buffs */
 585        for (i = 0; i < tx_ring->count; i++)
 586                i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
 587
 588        bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
 589        memset(tx_ring->tx_bi, 0, bi_size);
 590
 591        /* Zero out the descriptor ring */
 592        memset(tx_ring->desc, 0, tx_ring->size);
 593
 594        tx_ring->next_to_use = 0;
 595        tx_ring->next_to_clean = 0;
 596
 597        if (!tx_ring->netdev)
 598                return;
 599
 600        /* cleanup Tx queue statistics */
 601        netdev_tx_reset_queue(txring_txq(tx_ring));
 602}
 603
 604/**
 605 * i40e_free_tx_resources - Free Tx resources per queue
 606 * @tx_ring: Tx descriptor ring for a specific queue
 607 *
 608 * Free all transmit software resources
 609 **/
 610void i40e_free_tx_resources(struct i40e_ring *tx_ring)
 611{
 612        i40e_clean_tx_ring(tx_ring);
 613        kfree(tx_ring->tx_bi);
 614        tx_ring->tx_bi = NULL;
 615
 616        if (tx_ring->desc) {
 617                dma_free_coherent(tx_ring->dev, tx_ring->size,
 618                                  tx_ring->desc, tx_ring->dma);
 619                tx_ring->desc = NULL;
 620        }
 621}
 622
 623/**
 624 * i40e_get_tx_pending - how many tx descriptors not processed
 625 * @tx_ring: the ring of descriptors
 626 * @in_sw: is tx_pending being checked in SW or HW
 627 *
 628 * Since there is no access to the ring head register
 629 * in XL710, we need to use our local copies
 630 **/
 631u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
 632{
 633        u32 head, tail;
 634
 635        if (!in_sw)
 636                head = i40e_get_head(ring);
 637        else
 638                head = ring->next_to_clean;
 639        tail = readl(ring->tail);
 640
 641        if (head != tail)
 642                return (head < tail) ?
 643                        tail - head : (tail + ring->count - head);
 644
 645        return 0;
 646}
 647
 648#define WB_STRIDE 0x3
 649
 650/**
 651 * i40e_clean_tx_irq - Reclaim resources after transmit completes
 652 * @vsi: the VSI we care about
 653 * @tx_ring: Tx ring to clean
 654 * @napi_budget: Used to determine if we are in netpoll
 655 *
 656 * Returns true if there's any budget left (e.g. the clean is finished)
 657 **/
 658static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 659                              struct i40e_ring *tx_ring, int napi_budget)
 660{
 661        u16 i = tx_ring->next_to_clean;
 662        struct i40e_tx_buffer *tx_buf;
 663        struct i40e_tx_desc *tx_head;
 664        struct i40e_tx_desc *tx_desc;
 665        unsigned int total_bytes = 0, total_packets = 0;
 666        unsigned int budget = vsi->work_limit;
 667
 668        tx_buf = &tx_ring->tx_bi[i];
 669        tx_desc = I40E_TX_DESC(tx_ring, i);
 670        i -= tx_ring->count;
 671
 672        tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
 673
 674        do {
 675                struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
 676
 677                /* if next_to_watch is not set then there is no work pending */
 678                if (!eop_desc)
 679                        break;
 680
 681                /* prevent any other reads prior to eop_desc */
 682                read_barrier_depends();
 683
 684                /* we have caught up to head, no work left to do */
 685                if (tx_head == tx_desc)
 686                        break;
 687
 688                /* clear next_to_watch to prevent false hangs */
 689                tx_buf->next_to_watch = NULL;
 690
 691                /* update the statistics for this packet */
 692                total_bytes += tx_buf->bytecount;
 693                total_packets += tx_buf->gso_segs;
 694
 695                /* free the skb */
 696                napi_consume_skb(tx_buf->skb, napi_budget);
 697
 698                /* unmap skb header data */
 699                dma_unmap_single(tx_ring->dev,
 700                                 dma_unmap_addr(tx_buf, dma),
 701                                 dma_unmap_len(tx_buf, len),
 702                                 DMA_TO_DEVICE);
 703
 704                /* clear tx_buffer data */
 705                tx_buf->skb = NULL;
 706                dma_unmap_len_set(tx_buf, len, 0);
 707
 708                /* unmap remaining buffers */
 709                while (tx_desc != eop_desc) {
 710
 711                        tx_buf++;
 712                        tx_desc++;
 713                        i++;
 714                        if (unlikely(!i)) {
 715                                i -= tx_ring->count;
 716                                tx_buf = tx_ring->tx_bi;
 717                                tx_desc = I40E_TX_DESC(tx_ring, 0);
 718                        }
 719
 720                        /* unmap any remaining paged data */
 721                        if (dma_unmap_len(tx_buf, len)) {
 722                                dma_unmap_page(tx_ring->dev,
 723                                               dma_unmap_addr(tx_buf, dma),
 724                                               dma_unmap_len(tx_buf, len),
 725                                               DMA_TO_DEVICE);
 726                                dma_unmap_len_set(tx_buf, len, 0);
 727                        }
 728                }
 729
 730                /* move us one more past the eop_desc for start of next pkt */
 731                tx_buf++;
 732                tx_desc++;
 733                i++;
 734                if (unlikely(!i)) {
 735                        i -= tx_ring->count;
 736                        tx_buf = tx_ring->tx_bi;
 737                        tx_desc = I40E_TX_DESC(tx_ring, 0);
 738                }
 739
 740                prefetch(tx_desc);
 741
 742                /* update budget accounting */
 743                budget--;
 744        } while (likely(budget));
 745
 746        i += tx_ring->count;
 747        tx_ring->next_to_clean = i;
 748        u64_stats_update_begin(&tx_ring->syncp);
 749        tx_ring->stats.bytes += total_bytes;
 750        tx_ring->stats.packets += total_packets;
 751        u64_stats_update_end(&tx_ring->syncp);
 752        tx_ring->q_vector->tx.total_bytes += total_bytes;
 753        tx_ring->q_vector->tx.total_packets += total_packets;
 754
 755        if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
 756                /* check to see if there are < 4 descriptors
 757                 * waiting to be written back, then kick the hardware to force
 758                 * them to be written back in case we stay in NAPI.
 759                 * In this mode on X722 we do not enable Interrupt.
 760                 */
 761                unsigned int j = i40e_get_tx_pending(tx_ring, false);
 762
 763                if (budget &&
 764                    ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
 765                    !test_bit(__I40E_DOWN, &vsi->state) &&
 766                    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
 767                        tx_ring->arm_wb = true;
 768        }
 769
 770        /* notify netdev of completed buffers */
 771        netdev_tx_completed_queue(txring_txq(tx_ring),
 772                                  total_packets, total_bytes);
 773
 774#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
 775        if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
 776                     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
 777                /* Make sure that anybody stopping the queue after this
 778                 * sees the new next_to_clean.
 779                 */
 780                smp_mb();
 781                if (__netif_subqueue_stopped(tx_ring->netdev,
 782                                             tx_ring->queue_index) &&
 783                   !test_bit(__I40E_DOWN, &vsi->state)) {
 784                        netif_wake_subqueue(tx_ring->netdev,
 785                                            tx_ring->queue_index);
 786                        ++tx_ring->tx_stats.restart_queue;
 787                }
 788        }
 789
 790        return !!budget;
 791}
 792
 793/**
 794 * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
 795 * @vsi: the VSI we care about
 796 * @q_vector: the vector on which to enable writeback
 797 *
 798 **/
 799static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
 800                                  struct i40e_q_vector *q_vector)
 801{
 802        u16 flags = q_vector->tx.ring[0].flags;
 803        u32 val;
 804
 805        if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR))
 806                return;
 807
 808        if (q_vector->arm_wb_state)
 809                return;
 810
 811        if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
 812                val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
 813                      I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
 814
 815                wr32(&vsi->back->hw,
 816                     I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
 817                     val);
 818        } else {
 819                val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
 820                      I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
 821
 822                wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
 823        }
 824        q_vector->arm_wb_state = true;
 825}
 826
 827/**
 828 * i40e_force_wb - Issue SW Interrupt so HW does a wb
 829 * @vsi: the VSI we care about
 830 * @q_vector: the vector  on which to force writeback
 831 *
 832 **/
 833void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
 834{
 835        if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
 836                u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
 837                          I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
 838                          I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
 839                          I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
 840                          /* allow 00 to be written to the index */
 841
 842                wr32(&vsi->back->hw,
 843                     I40E_PFINT_DYN_CTLN(q_vector->v_idx +
 844                                         vsi->base_vector - 1), val);
 845        } else {
 846                u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
 847                          I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
 848                          I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
 849                          I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
 850                        /* allow 00 to be written to the index */
 851
 852                wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
 853        }
 854}
 855
 856/**
 857 * i40e_set_new_dynamic_itr - Find new ITR level
 858 * @rc: structure containing ring performance data
 859 *
 860 * Returns true if ITR changed, false if not
 861 *
 862 * Stores a new ITR value based on packets and byte counts during
 863 * the last interrupt.  The advantage of per interrupt computation
 864 * is faster updates and more accurate ITR for the current traffic
 865 * pattern.  Constants in this function were computed based on
 866 * theoretical maximum wire speed and thresholds were set based on
 867 * testing data as well as attempting to minimize response time
 868 * while increasing bulk throughput.
 869 **/
 870static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 871{
 872        enum i40e_latency_range new_latency_range = rc->latency_range;
 873        struct i40e_q_vector *qv = rc->ring->q_vector;
 874        u32 new_itr = rc->itr;
 875        int bytes_per_int;
 876        int usecs;
 877
 878        if (rc->total_packets == 0 || !rc->itr)
 879                return false;
 880
 881        /* simple throttlerate management
 882         *   0-10MB/s   lowest (50000 ints/s)
 883         *  10-20MB/s   low    (20000 ints/s)
 884         *  20-1249MB/s bulk   (18000 ints/s)
 885         *  > 40000 Rx packets per second (8000 ints/s)
 886         *
 887         * The math works out because the divisor is in 10^(-6) which
 888         * turns the bytes/us input value into MB/s values, but
 889         * make sure to use usecs, as the register values written
 890         * are in 2 usec increments in the ITR registers, and make sure
 891         * to use the smoothed values that the countdown timer gives us.
 892         */
 893        usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
 894        bytes_per_int = rc->total_bytes / usecs;
 895
 896        switch (new_latency_range) {
 897        case I40E_LOWEST_LATENCY:
 898                if (bytes_per_int > 10)
 899                        new_latency_range = I40E_LOW_LATENCY;
 900                break;
 901        case I40E_LOW_LATENCY:
 902                if (bytes_per_int > 20)
 903                        new_latency_range = I40E_BULK_LATENCY;
 904                else if (bytes_per_int <= 10)
 905                        new_latency_range = I40E_LOWEST_LATENCY;
 906                break;
 907        case I40E_BULK_LATENCY:
 908        case I40E_ULTRA_LATENCY:
 909        default:
 910                if (bytes_per_int <= 20)
 911                        new_latency_range = I40E_LOW_LATENCY;
 912                break;
 913        }
 914
 915        /* this is to adjust RX more aggressively when streaming small
 916         * packets.  The value of 40000 was picked as it is just beyond
 917         * what the hardware can receive per second if in low latency
 918         * mode.
 919         */
 920#define RX_ULTRA_PACKET_RATE 40000
 921
 922        if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) &&
 923            (&qv->rx == rc))
 924                new_latency_range = I40E_ULTRA_LATENCY;
 925
 926        rc->latency_range = new_latency_range;
 927
 928        switch (new_latency_range) {
 929        case I40E_LOWEST_LATENCY:
 930                new_itr = I40E_ITR_50K;
 931                break;
 932        case I40E_LOW_LATENCY:
 933                new_itr = I40E_ITR_20K;
 934                break;
 935        case I40E_BULK_LATENCY:
 936                new_itr = I40E_ITR_18K;
 937                break;
 938        case I40E_ULTRA_LATENCY:
 939                new_itr = I40E_ITR_8K;
 940                break;
 941        default:
 942                break;
 943        }
 944
 945        rc->total_bytes = 0;
 946        rc->total_packets = 0;
 947
 948        if (new_itr != rc->itr) {
 949                rc->itr = new_itr;
 950                return true;
 951        }
 952
 953        return false;
 954}
 955
 956/**
 957 * i40e_clean_programming_status - clean the programming status descriptor
 958 * @rx_ring: the rx ring that has this descriptor
 959 * @rx_desc: the rx descriptor written back by HW
 960 *
 961 * Flow director should handle FD_FILTER_STATUS to check its filter programming
 962 * status being successful or not and take actions accordingly. FCoE should
 963 * handle its context/filter programming/invalidation status and take actions.
 964 *
 965 **/
 966static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
 967                                          union i40e_rx_desc *rx_desc)
 968{
 969        u64 qw;
 970        u8 id;
 971
 972        qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
 973        id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
 974                  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
 975
 976        if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
 977                i40e_fd_handle_status(rx_ring, rx_desc, id);
 978#ifdef I40E_FCOE
 979        else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
 980                 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
 981                i40e_fcoe_handle_status(rx_ring, rx_desc, id);
 982#endif
 983}
 984
 985/**
 986 * i40e_setup_tx_descriptors - Allocate the Tx descriptors
 987 * @tx_ring: the tx ring to set up
 988 *
 989 * Return 0 on success, negative on error
 990 **/
 991int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
 992{
 993        struct device *dev = tx_ring->dev;
 994        int bi_size;
 995
 996        if (!dev)
 997                return -ENOMEM;
 998
 999        /* warn if we are about to overwrite the pointer */
1000        WARN_ON(tx_ring->tx_bi);
1001        bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
1002        tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
1003        if (!tx_ring->tx_bi)
1004                goto err;
1005
1006        /* round up to nearest 4K */
1007        tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
1008        /* add u32 for head writeback, align after this takes care of
1009         * guaranteeing this is at least one cache line in size
1010         */
1011        tx_ring->size += sizeof(u32);
1012        tx_ring->size = ALIGN(tx_ring->size, 4096);
1013        tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1014                                           &tx_ring->dma, GFP_KERNEL);
1015        if (!tx_ring->desc) {
1016                dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1017                         tx_ring->size);
1018                goto err;
1019        }
1020
1021        tx_ring->next_to_use = 0;
1022        tx_ring->next_to_clean = 0;
1023        return 0;
1024
1025err:
1026        kfree(tx_ring->tx_bi);
1027        tx_ring->tx_bi = NULL;
1028        return -ENOMEM;
1029}
1030
1031/**
1032 * i40e_clean_rx_ring - Free Rx buffers
1033 * @rx_ring: ring to be cleaned
1034 **/
1035void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1036{
1037        struct device *dev = rx_ring->dev;
1038        unsigned long bi_size;
1039        u16 i;
1040
1041        /* ring already cleared, nothing to do */
1042        if (!rx_ring->rx_bi)
1043                return;
1044
1045        /* Free all the Rx ring sk_buffs */
1046        for (i = 0; i < rx_ring->count; i++) {
1047                struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
1048
1049                if (rx_bi->skb) {
1050                        dev_kfree_skb(rx_bi->skb);
1051                        rx_bi->skb = NULL;
1052                }
1053                if (!rx_bi->page)
1054                        continue;
1055
1056                dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE, DMA_FROM_DEVICE);
1057                __free_pages(rx_bi->page, 0);
1058
1059                rx_bi->page = NULL;
1060                rx_bi->page_offset = 0;
1061        }
1062
1063        bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1064        memset(rx_ring->rx_bi, 0, bi_size);
1065
1066        /* Zero out the descriptor ring */
1067        memset(rx_ring->desc, 0, rx_ring->size);
1068
1069        rx_ring->next_to_alloc = 0;
1070        rx_ring->next_to_clean = 0;
1071        rx_ring->next_to_use = 0;
1072}
1073
1074/**
1075 * i40e_free_rx_resources - Free Rx resources
1076 * @rx_ring: ring to clean the resources from
1077 *
1078 * Free all receive software resources
1079 **/
1080void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1081{
1082        i40e_clean_rx_ring(rx_ring);
1083        kfree(rx_ring->rx_bi);
1084        rx_ring->rx_bi = NULL;
1085
1086        if (rx_ring->desc) {
1087                dma_free_coherent(rx_ring->dev, rx_ring->size,
1088                                  rx_ring->desc, rx_ring->dma);
1089                rx_ring->desc = NULL;
1090        }
1091}
1092
1093/**
1094 * i40e_setup_rx_descriptors - Allocate Rx descriptors
1095 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1096 *
1097 * Returns 0 on success, negative on failure
1098 **/
1099int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1100{
1101        struct device *dev = rx_ring->dev;
1102        int bi_size;
1103
1104        /* warn if we are about to overwrite the pointer */
1105        WARN_ON(rx_ring->rx_bi);
1106        bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1107        rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1108        if (!rx_ring->rx_bi)
1109                goto err;
1110
1111        u64_stats_init(&rx_ring->syncp);
1112
1113        /* Round up to nearest 4K */
1114        rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1115        rx_ring->size = ALIGN(rx_ring->size, 4096);
1116        rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1117                                           &rx_ring->dma, GFP_KERNEL);
1118
1119        if (!rx_ring->desc) {
1120                dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1121                         rx_ring->size);
1122                goto err;
1123        }
1124
1125        rx_ring->next_to_alloc = 0;
1126        rx_ring->next_to_clean = 0;
1127        rx_ring->next_to_use = 0;
1128
1129        return 0;
1130err:
1131        kfree(rx_ring->rx_bi);
1132        rx_ring->rx_bi = NULL;
1133        return -ENOMEM;
1134}
1135
1136/**
1137 * i40e_release_rx_desc - Store the new tail and head values
1138 * @rx_ring: ring to bump
1139 * @val: new head index
1140 **/
1141static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1142{
1143        rx_ring->next_to_use = val;
1144
1145        /* update next to alloc since we have filled the ring */
1146        rx_ring->next_to_alloc = val;
1147
1148        /* Force memory writes to complete before letting h/w
1149         * know there are new descriptors to fetch.  (Only
1150         * applicable for weak-ordered memory model archs,
1151         * such as IA-64).
1152         */
1153        wmb();
1154        writel(val, rx_ring->tail);
1155}
1156
1157/**
1158 * i40e_alloc_mapped_page - recycle or make a new page
1159 * @rx_ring: ring to use
1160 * @bi: rx_buffer struct to modify
1161 *
1162 * Returns true if the page was successfully allocated or
1163 * reused.
1164 **/
1165static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
1166                                   struct i40e_rx_buffer *bi)
1167{
1168        struct page *page = bi->page;
1169        dma_addr_t dma;
1170
1171        /* since we are recycling buffers we should seldom need to alloc */
1172        if (likely(page)) {
1173                rx_ring->rx_stats.page_reuse_count++;
1174                return true;
1175        }
1176
1177        /* alloc new page for storage */
1178        page = dev_alloc_page();
1179        if (unlikely(!page)) {
1180                rx_ring->rx_stats.alloc_page_failed++;
1181                return false;
1182        }
1183
1184        /* map page for use */
1185        dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
1186
1187        /* if mapping failed free memory back to system since
1188         * there isn't much point in holding memory we can't use
1189         */
1190        if (dma_mapping_error(rx_ring->dev, dma)) {
1191                __free_pages(page, 0);
1192                rx_ring->rx_stats.alloc_page_failed++;
1193                return false;
1194        }
1195
1196        bi->dma = dma;
1197        bi->page = page;
1198        bi->page_offset = 0;
1199
1200        return true;
1201}
1202
1203/**
1204 * i40e_receive_skb - Send a completed packet up the stack
1205 * @rx_ring:  rx ring in play
1206 * @skb: packet to send up
1207 * @vlan_tag: vlan tag for packet
1208 **/
1209static void i40e_receive_skb(struct i40e_ring *rx_ring,
1210                             struct sk_buff *skb, u16 vlan_tag)
1211{
1212        struct i40e_q_vector *q_vector = rx_ring->q_vector;
1213
1214        if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
1215            (vlan_tag & VLAN_VID_MASK))
1216                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1217
1218        napi_gro_receive(&q_vector->napi, skb);
1219}
1220
1221/**
1222 * i40e_alloc_rx_buffers - Replace used receive buffers
1223 * @rx_ring: ring to place buffers on
1224 * @cleaned_count: number of buffers to replace
1225 *
1226 * Returns false if all allocations were successful, true if any fail
1227 **/
1228bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
1229{
1230        u16 ntu = rx_ring->next_to_use;
1231        union i40e_rx_desc *rx_desc;
1232        struct i40e_rx_buffer *bi;
1233
1234        /* do nothing if no valid netdev defined */
1235        if (!rx_ring->netdev || !cleaned_count)
1236                return false;
1237
1238        rx_desc = I40E_RX_DESC(rx_ring, ntu);
1239        bi = &rx_ring->rx_bi[ntu];
1240
1241        do {
1242                if (!i40e_alloc_mapped_page(rx_ring, bi))
1243                        goto no_buffers;
1244
1245                /* Refresh the desc even if buffer_addrs didn't change
1246                 * because each write-back erases this info.
1247                 */
1248                rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
1249                rx_desc->read.hdr_addr = 0;
1250
1251                rx_desc++;
1252                bi++;
1253                ntu++;
1254                if (unlikely(ntu == rx_ring->count)) {
1255                        rx_desc = I40E_RX_DESC(rx_ring, 0);
1256                        bi = rx_ring->rx_bi;
1257                        ntu = 0;
1258                }
1259
1260                /* clear the status bits for the next_to_use descriptor */
1261                rx_desc->wb.qword1.status_error_len = 0;
1262
1263                cleaned_count--;
1264        } while (cleaned_count);
1265
1266        if (rx_ring->next_to_use != ntu)
1267                i40e_release_rx_desc(rx_ring, ntu);
1268
1269        return false;
1270
1271no_buffers:
1272        if (rx_ring->next_to_use != ntu)
1273                i40e_release_rx_desc(rx_ring, ntu);
1274
1275        /* make sure to come back via polling to try again after
1276         * allocation failure
1277         */
1278        return true;
1279}
1280
1281/**
1282 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1283 * @vsi: the VSI we care about
1284 * @skb: skb currently being received and modified
1285 * @rx_desc: the receive descriptor
1286 *
1287 * skb->protocol must be set before this function is called
1288 **/
1289static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1290                                    struct sk_buff *skb,
1291                                    union i40e_rx_desc *rx_desc)
1292{
1293        struct i40e_rx_ptype_decoded decoded;
1294        u32 rx_error, rx_status;
1295        bool ipv4, ipv6;
1296        u8 ptype;
1297        u64 qword;
1298
1299        qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1300        ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT;
1301        rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1302                   I40E_RXD_QW1_ERROR_SHIFT;
1303        rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1304                    I40E_RXD_QW1_STATUS_SHIFT;
1305        decoded = decode_rx_desc_ptype(ptype);
1306
1307        skb->ip_summed = CHECKSUM_NONE;
1308
1309        skb_checksum_none_assert(skb);
1310
1311        /* Rx csum enabled and ip headers found? */
1312        if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1313                return;
1314
1315        /* did the hardware decode the packet and checksum? */
1316        if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1317                return;
1318
1319        /* both known and outer_ip must be set for the below code to work */
1320        if (!(decoded.known && decoded.outer_ip))
1321                return;
1322
1323        ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1324               (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
1325        ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1326               (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
1327
1328        if (ipv4 &&
1329            (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1330                         BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1331                goto checksum_fail;
1332
1333        /* likely incorrect csum if alternate IP extension headers found */
1334        if (ipv6 &&
1335            rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1336                /* don't increment checksum err here, non-fatal err */
1337                return;
1338
1339        /* there was some L4 error, count error and punt packet to the stack */
1340        if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
1341                goto checksum_fail;
1342
1343        /* handle packets that were not able to be checksummed due
1344         * to arrival speed, in this case the stack can compute
1345         * the csum.
1346         */
1347        if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
1348                return;
1349
1350        /* If there is an outer header present that might contain a checksum
1351         * we need to bump the checksum level by 1 to reflect the fact that
1352         * we are indicating we validated the inner checksum.
1353         */
1354        if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT)
1355                skb->csum_level = 1;
1356
1357        /* Only report checksum unnecessary for TCP, UDP, or SCTP */
1358        switch (decoded.inner_prot) {
1359        case I40E_RX_PTYPE_INNER_PROT_TCP:
1360        case I40E_RX_PTYPE_INNER_PROT_UDP:
1361        case I40E_RX_PTYPE_INNER_PROT_SCTP:
1362                skb->ip_summed = CHECKSUM_UNNECESSARY;
1363                /* fall though */
1364        default:
1365                break;
1366        }
1367
1368        return;
1369
1370checksum_fail:
1371        vsi->back->hw_csum_rx_error++;
1372}
1373
1374/**
1375 * i40e_ptype_to_htype - get a hash type
1376 * @ptype: the ptype value from the descriptor
1377 *
1378 * Returns a hash type to be used by skb_set_hash
1379 **/
1380static inline int i40e_ptype_to_htype(u8 ptype)
1381{
1382        struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1383
1384        if (!decoded.known)
1385                return PKT_HASH_TYPE_NONE;
1386
1387        if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1388            decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1389                return PKT_HASH_TYPE_L4;
1390        else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1391                 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1392                return PKT_HASH_TYPE_L3;
1393        else
1394                return PKT_HASH_TYPE_L2;
1395}
1396
1397/**
1398 * i40e_rx_hash - set the hash value in the skb
1399 * @ring: descriptor ring
1400 * @rx_desc: specific descriptor
1401 **/
1402static inline void i40e_rx_hash(struct i40e_ring *ring,
1403                                union i40e_rx_desc *rx_desc,
1404                                struct sk_buff *skb,
1405                                u8 rx_ptype)
1406{
1407        u32 hash;
1408        const __le64 rss_mask =
1409                cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1410                            I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1411
1412        if (!(ring->netdev->features & NETIF_F_RXHASH))
1413                return;
1414
1415        if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
1416                hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1417                skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype));
1418        }
1419}
1420
1421/**
1422 * i40e_process_skb_fields - Populate skb header fields from Rx descriptor
1423 * @rx_ring: rx descriptor ring packet is being transacted on
1424 * @rx_desc: pointer to the EOP Rx descriptor
1425 * @skb: pointer to current skb being populated
1426 * @rx_ptype: the packet type decoded by hardware
1427 *
1428 * This function checks the ring, descriptor, and packet information in
1429 * order to populate the hash, checksum, VLAN, protocol, and
1430 * other fields within the skb.
1431 **/
1432static inline
1433void i40e_process_skb_fields(struct i40e_ring *rx_ring,
1434                             union i40e_rx_desc *rx_desc, struct sk_buff *skb,
1435                             u8 rx_ptype)
1436{
1437        u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1438        u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1439                        I40E_RXD_QW1_STATUS_SHIFT;
1440        u32 rsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1441                   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT;
1442
1443        if (unlikely(rsyn)) {
1444                i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, rsyn);
1445                rx_ring->last_rx_timestamp = jiffies;
1446        }
1447
1448        i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
1449
1450        /* modifies the skb - consumes the enet header */
1451        skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1452
1453        i40e_rx_checksum(rx_ring->vsi, skb, rx_desc);
1454
1455        skb_record_rx_queue(skb, rx_ring->queue_index);
1456}
1457
1458/**
1459 * i40e_pull_tail - i40e specific version of skb_pull_tail
1460 * @rx_ring: rx descriptor ring packet is being transacted on
1461 * @skb: pointer to current skb being adjusted
1462 *
1463 * This function is an i40e specific version of __pskb_pull_tail.  The
1464 * main difference between this version and the original function is that
1465 * this function can make several assumptions about the state of things
1466 * that allow for significant optimizations versus the standard function.
1467 * As a result we can do things like drop a frag and maintain an accurate
1468 * truesize for the skb.
1469 */
1470static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb)
1471{
1472        struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
1473        unsigned char *va;
1474        unsigned int pull_len;
1475
1476        /* it is valid to use page_address instead of kmap since we are
1477         * working with pages allocated out of the lomem pool per
1478         * alloc_page(GFP_ATOMIC)
1479         */
1480        va = skb_frag_address(frag);
1481
1482        /* we need the header to contain the greater of either ETH_HLEN or
1483         * 60 bytes if the skb->len is less than 60 for skb_pad.
1484         */
1485        pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE);
1486
1487        /* align pull length to size of long to optimize memcpy performance */
1488        skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
1489
1490        /* update all of the pointers */
1491        skb_frag_size_sub(frag, pull_len);
1492        frag->page_offset += pull_len;
1493        skb->data_len -= pull_len;
1494        skb->tail += pull_len;
1495}
1496
1497/**
1498 * i40e_cleanup_headers - Correct empty headers
1499 * @rx_ring: rx descriptor ring packet is being transacted on
1500 * @skb: pointer to current skb being fixed
1501 *
1502 * Also address the case where we are pulling data in on pages only
1503 * and as such no data is present in the skb header.
1504 *
1505 * In addition if skb is not at least 60 bytes we need to pad it so that
1506 * it is large enough to qualify as a valid Ethernet frame.
1507 *
1508 * Returns true if an error was encountered and skb was freed.
1509 **/
1510static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb)
1511{
1512        /* place header in linear portion of buffer */
1513        if (skb_is_nonlinear(skb))
1514                i40e_pull_tail(rx_ring, skb);
1515
1516        /* if eth_skb_pad returns an error the skb was freed */
1517        if (eth_skb_pad(skb))
1518                return true;
1519
1520        return false;
1521}
1522
1523/**
1524 * i40e_reuse_rx_page - page flip buffer and store it back on the ring
1525 * @rx_ring: rx descriptor ring to store buffers on
1526 * @old_buff: donor buffer to have page reused
1527 *
1528 * Synchronizes page for reuse by the adapter
1529 **/
1530static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
1531                               struct i40e_rx_buffer *old_buff)
1532{
1533        struct i40e_rx_buffer *new_buff;
1534        u16 nta = rx_ring->next_to_alloc;
1535
1536        new_buff = &rx_ring->rx_bi[nta];
1537
1538        /* update, and store next to alloc */
1539        nta++;
1540        rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
1541
1542        /* transfer page from old buffer to new buffer */
1543        *new_buff = *old_buff;
1544}
1545
1546/**
1547 * i40e_page_is_reserved - check if reuse is possible
1548 * @page: page struct to check
1549 */
1550static inline bool i40e_page_is_reserved(struct page *page)
1551{
1552        return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
1553}
1554
1555/**
1556 * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff
1557 * @rx_ring: rx descriptor ring to transact packets on
1558 * @rx_buffer: buffer containing page to add
1559 * @rx_desc: descriptor containing length of buffer written by hardware
1560 * @skb: sk_buff to place the data into
1561 *
1562 * This function will add the data contained in rx_buffer->page to the skb.
1563 * This is done either through a direct copy if the data in the buffer is
1564 * less than the skb header size, otherwise it will just attach the page as
1565 * a frag to the skb.
1566 *
1567 * The function will then update the page offset if necessary and return
1568 * true if the buffer can be reused by the adapter.
1569 **/
1570static bool i40e_add_rx_frag(struct i40e_ring *rx_ring,
1571                             struct i40e_rx_buffer *rx_buffer,
1572                             union i40e_rx_desc *rx_desc,
1573                             struct sk_buff *skb)
1574{
1575        struct page *page = rx_buffer->page;
1576        u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1577        unsigned int size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1578                            I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1579#if (PAGE_SIZE < 8192)
1580        unsigned int truesize = I40E_RXBUFFER_2048;
1581#else
1582        unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
1583        unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
1584#endif
1585
1586        /* will the data fit in the skb we allocated? if so, just
1587         * copy it as it is pretty small anyway
1588         */
1589        if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) {
1590                unsigned char *va = page_address(page) + rx_buffer->page_offset;
1591
1592                memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
1593
1594                /* page is not reserved, we can reuse buffer as-is */
1595                if (likely(!i40e_page_is_reserved(page)))
1596                        return true;
1597
1598                /* this page cannot be reused so discard it */
1599                __free_pages(page, 0);
1600                return false;
1601        }
1602
1603        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
1604                        rx_buffer->page_offset, size, truesize);
1605
1606        /* avoid re-using remote pages */
1607        if (unlikely(i40e_page_is_reserved(page)))
1608                return false;
1609
1610#if (PAGE_SIZE < 8192)
1611        /* if we are only owner of page we can reuse it */
1612        if (unlikely(page_count(page) != 1))
1613                return false;
1614
1615        /* flip page offset to other buffer */
1616        rx_buffer->page_offset ^= truesize;
1617#else
1618        /* move offset up to the next cache line */
1619        rx_buffer->page_offset += truesize;
1620
1621        if (rx_buffer->page_offset > last_offset)
1622                return false;
1623#endif
1624
1625        /* Even if we own the page, we are not allowed to use atomic_set()
1626         * This would break get_page_unless_zero() users.
1627         */
1628        get_page(rx_buffer->page);
1629
1630        return true;
1631}
1632
1633/**
1634 * i40e_fetch_rx_buffer - Allocate skb and populate it
1635 * @rx_ring: rx descriptor ring to transact packets on
1636 * @rx_desc: descriptor containing info written by hardware
1637 *
1638 * This function allocates an skb on the fly, and populates it with the page
1639 * data from the current receive descriptor, taking care to set up the skb
1640 * correctly, as well as handling calling the page recycle function if
1641 * necessary.
1642 */
1643static inline
1644struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring,
1645                                     union i40e_rx_desc *rx_desc)
1646{
1647        struct i40e_rx_buffer *rx_buffer;
1648        struct sk_buff *skb;
1649        struct page *page;
1650
1651        rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
1652        page = rx_buffer->page;
1653        prefetchw(page);
1654
1655        skb = rx_buffer->skb;
1656
1657        if (likely(!skb)) {
1658                void *page_addr = page_address(page) + rx_buffer->page_offset;
1659
1660                /* prefetch first cache line of first page */
1661                prefetch(page_addr);
1662#if L1_CACHE_BYTES < 128
1663                prefetch(page_addr + L1_CACHE_BYTES);
1664#endif
1665
1666                /* allocate a skb to store the frags */
1667                skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
1668                                       I40E_RX_HDR_SIZE,
1669                                       GFP_ATOMIC | __GFP_NOWARN);
1670                if (unlikely(!skb)) {
1671                        rx_ring->rx_stats.alloc_buff_failed++;
1672                        return NULL;
1673                }
1674
1675                /* we will be copying header into skb->data in
1676                 * pskb_may_pull so it is in our interest to prefetch
1677                 * it now to avoid a possible cache miss
1678                 */
1679                prefetchw(skb->data);
1680        } else {
1681                rx_buffer->skb = NULL;
1682        }
1683
1684        /* we are reusing so sync this buffer for CPU use */
1685        dma_sync_single_range_for_cpu(rx_ring->dev,
1686                                      rx_buffer->dma,
1687                                      rx_buffer->page_offset,
1688                                      I40E_RXBUFFER_2048,
1689                                      DMA_FROM_DEVICE);
1690
1691        /* pull page into skb */
1692        if (i40e_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
1693                /* hand second half of page back to the ring */
1694                i40e_reuse_rx_page(rx_ring, rx_buffer);
1695                rx_ring->rx_stats.page_reuse_count++;
1696        } else {
1697                /* we are not reusing the buffer so unmap it */
1698                dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
1699                               DMA_FROM_DEVICE);
1700        }
1701
1702        /* clear contents of buffer_info */
1703        rx_buffer->page = NULL;
1704
1705        return skb;
1706}
1707
1708/**
1709 * i40e_is_non_eop - process handling of non-EOP buffers
1710 * @rx_ring: Rx ring being processed
1711 * @rx_desc: Rx descriptor for current buffer
1712 * @skb: Current socket buffer containing buffer in progress
1713 *
1714 * This function updates next to clean.  If the buffer is an EOP buffer
1715 * this function exits returning false, otherwise it will place the
1716 * sk_buff in the next buffer to be chained and return true indicating
1717 * that this is in fact a non-EOP buffer.
1718 **/
1719static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
1720                            union i40e_rx_desc *rx_desc,
1721                            struct sk_buff *skb)
1722{
1723        u32 ntc = rx_ring->next_to_clean + 1;
1724
1725        /* fetch, update, and store next to clean */
1726        ntc = (ntc < rx_ring->count) ? ntc : 0;
1727        rx_ring->next_to_clean = ntc;
1728
1729        prefetch(I40E_RX_DESC(rx_ring, ntc));
1730
1731#define staterrlen rx_desc->wb.qword1.status_error_len
1732        if (unlikely(i40e_rx_is_programming_status(le64_to_cpu(staterrlen)))) {
1733                i40e_clean_programming_status(rx_ring, rx_desc);
1734                rx_ring->rx_bi[ntc].skb = skb;
1735                return true;
1736        }
1737        /* if we are the last buffer then there is nothing else to do */
1738#define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)
1739        if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF)))
1740                return false;
1741
1742        /* place skb in next buffer to be received */
1743        rx_ring->rx_bi[ntc].skb = skb;
1744        rx_ring->rx_stats.non_eop_descs++;
1745
1746        return true;
1747}
1748
1749/**
1750 * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
1751 * @rx_ring: rx descriptor ring to transact packets on
1752 * @budget: Total limit on number of packets to process
1753 *
1754 * This function provides a "bounce buffer" approach to Rx interrupt
1755 * processing.  The advantage to this is that on systems that have
1756 * expensive overhead for IOMMU access this provides a means of avoiding
1757 * it by maintaining the mapping of the page to the system.
1758 *
1759 * Returns amount of work completed
1760 **/
1761static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
1762{
1763        unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1764        u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1765        bool failure = false;
1766
1767        while (likely(total_rx_packets < budget)) {
1768                union i40e_rx_desc *rx_desc;
1769                struct sk_buff *skb;
1770                u32 rx_status;
1771                u16 vlan_tag;
1772                u8 rx_ptype;
1773                u64 qword;
1774
1775                /* return some buffers to hardware, one at a time is too slow */
1776                if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1777                        failure = failure ||
1778                                  i40e_alloc_rx_buffers(rx_ring, cleaned_count);
1779                        cleaned_count = 0;
1780                }
1781
1782                rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
1783
1784                qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1785                rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1786                           I40E_RXD_QW1_PTYPE_SHIFT;
1787                rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1788                            I40E_RXD_QW1_STATUS_SHIFT;
1789
1790                if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1791                        break;
1792
1793                /* status_error_len will always be zero for unused descriptors
1794                 * because it's cleared in cleanup, and overlaps with hdr_addr
1795                 * which is always zero because packet split isn't used, if the
1796                 * hardware wrote DD then it will be non-zero
1797                 */
1798                if (!rx_desc->wb.qword1.status_error_len)
1799                        break;
1800
1801                /* This memory barrier is needed to keep us from reading
1802                 * any other fields out of the rx_desc until we know the
1803                 * DD bit is set.
1804                 */
1805                dma_rmb();
1806
1807                skb = i40e_fetch_rx_buffer(rx_ring, rx_desc);
1808                if (!skb)
1809                        break;
1810
1811                cleaned_count++;
1812
1813                if (i40e_is_non_eop(rx_ring, rx_desc, skb))
1814                        continue;
1815
1816                /* ERR_MASK will only have valid bits if EOP set, and
1817                 * what we are doing here is actually checking
1818                 * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
1819                 * the error field
1820                 */
1821                if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
1822                        dev_kfree_skb_any(skb);
1823                        continue;
1824                }
1825
1826                if (i40e_cleanup_headers(rx_ring, skb))
1827                        continue;
1828
1829                /* probably a little skewed due to removing CRC */
1830                total_rx_bytes += skb->len;
1831
1832                /* populate checksum, VLAN, and protocol */
1833                i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
1834
1835#ifdef I40E_FCOE
1836                if (unlikely(
1837                    i40e_rx_is_fcoe(rx_ptype) &&
1838                    !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) {
1839                        dev_kfree_skb_any(skb);
1840                        continue;
1841                }
1842#endif
1843
1844                vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ?
1845                           le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0;
1846
1847                i40e_receive_skb(rx_ring, skb, vlan_tag);
1848
1849                /* update budget accounting */
1850                total_rx_packets++;
1851        }
1852
1853        u64_stats_update_begin(&rx_ring->syncp);
1854        rx_ring->stats.packets += total_rx_packets;
1855        rx_ring->stats.bytes += total_rx_bytes;
1856        u64_stats_update_end(&rx_ring->syncp);
1857        rx_ring->q_vector->rx.total_packets += total_rx_packets;
1858        rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1859
1860        /* guarantee a trip back through this routine if there was a failure */
1861        return failure ? budget : total_rx_packets;
1862}
1863
1864static u32 i40e_buildreg_itr(const int type, const u16 itr)
1865{
1866        u32 val;
1867
1868        val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1869              /* Don't clear PBA because that can cause lost interrupts that
1870               * came in while we were cleaning/polling
1871               */
1872              (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1873              (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1874
1875        return val;
1876}
1877
1878/* a small macro to shorten up some long lines */
1879#define INTREG I40E_PFINT_DYN_CTLN
1880static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx)
1881{
1882        return !!(vsi->rx_rings[idx]->rx_itr_setting);
1883}
1884
1885static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx)
1886{
1887        return !!(vsi->tx_rings[idx]->tx_itr_setting);
1888}
1889
1890/**
1891 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1892 * @vsi: the VSI we care about
1893 * @q_vector: q_vector for which itr is being updated and interrupt enabled
1894 *
1895 **/
1896static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1897                                          struct i40e_q_vector *q_vector)
1898{
1899        struct i40e_hw *hw = &vsi->back->hw;
1900        bool rx = false, tx = false;
1901        u32 rxval, txval;
1902        int vector;
1903        int idx = q_vector->v_idx;
1904        int rx_itr_setting, tx_itr_setting;
1905
1906        vector = (q_vector->v_idx + vsi->base_vector);
1907
1908        /* avoid dynamic calculation if in countdown mode OR if
1909         * all dynamic is disabled
1910         */
1911        rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
1912
1913        rx_itr_setting = get_rx_itr_enabled(vsi, idx);
1914        tx_itr_setting = get_tx_itr_enabled(vsi, idx);
1915
1916        if (q_vector->itr_countdown > 0 ||
1917            (!ITR_IS_DYNAMIC(rx_itr_setting) &&
1918             !ITR_IS_DYNAMIC(tx_itr_setting))) {
1919                goto enable_int;
1920        }
1921
1922        if (ITR_IS_DYNAMIC(tx_itr_setting)) {
1923                rx = i40e_set_new_dynamic_itr(&q_vector->rx);
1924                rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
1925        }
1926
1927        if (ITR_IS_DYNAMIC(tx_itr_setting)) {
1928                tx = i40e_set_new_dynamic_itr(&q_vector->tx);
1929                txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
1930        }
1931
1932        if (rx || tx) {
1933                /* get the higher of the two ITR adjustments and
1934                 * use the same value for both ITR registers
1935                 * when in adaptive mode (Rx and/or Tx)
1936                 */
1937                u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
1938
1939                q_vector->tx.itr = q_vector->rx.itr = itr;
1940                txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
1941                tx = true;
1942                rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
1943                rx = true;
1944        }
1945
1946        /* only need to enable the interrupt once, but need
1947         * to possibly update both ITR values
1948         */
1949        if (rx) {
1950                /* set the INTENA_MSK_MASK so that this first write
1951                 * won't actually enable the interrupt, instead just
1952                 * updating the ITR (it's bit 31 PF and VF)
1953                 */
1954                rxval |= BIT(31);
1955                /* don't check _DOWN because interrupt isn't being enabled */
1956                wr32(hw, INTREG(vector - 1), rxval);
1957        }
1958
1959enable_int:
1960        if (!test_bit(__I40E_DOWN, &vsi->state))
1961                wr32(hw, INTREG(vector - 1), txval);
1962
1963        if (q_vector->itr_countdown)
1964                q_vector->itr_countdown--;
1965        else
1966                q_vector->itr_countdown = ITR_COUNTDOWN_START;
1967}
1968
1969/**
1970 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1971 * @napi: napi struct with our devices info in it
1972 * @budget: amount of work driver is allowed to do this pass, in packets
1973 *
1974 * This function will clean all queues associated with a q_vector.
1975 *
1976 * Returns the amount of work done
1977 **/
1978int i40e_napi_poll(struct napi_struct *napi, int budget)
1979{
1980        struct i40e_q_vector *q_vector =
1981                               container_of(napi, struct i40e_q_vector, napi);
1982        struct i40e_vsi *vsi = q_vector->vsi;
1983        struct i40e_ring *ring;
1984        bool clean_complete = true;
1985        bool arm_wb = false;
1986        int budget_per_ring;
1987        int work_done = 0;
1988
1989        if (test_bit(__I40E_DOWN, &vsi->state)) {
1990                napi_complete(napi);
1991                return 0;
1992        }
1993
1994        /* Clear hung_detected bit */
1995        clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected);
1996        /* Since the actual Tx work is minimal, we can give the Tx a larger
1997         * budget and be more aggressive about cleaning up the Tx descriptors.
1998         */
1999        i40e_for_each_ring(ring, q_vector->tx) {
2000                if (!i40e_clean_tx_irq(vsi, ring, budget)) {
2001                        clean_complete = false;
2002                        continue;
2003                }
2004                arm_wb |= ring->arm_wb;
2005                ring->arm_wb = false;
2006        }
2007
2008        /* Handle case where we are called by netpoll with a budget of 0 */
2009        if (budget <= 0)
2010                goto tx_only;
2011
2012        /* We attempt to distribute budget to each Rx queue fairly, but don't
2013         * allow the budget to go below 1 because that would exit polling early.
2014         */
2015        budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
2016
2017        i40e_for_each_ring(ring, q_vector->rx) {
2018                int cleaned = i40e_clean_rx_irq(ring, budget_per_ring);
2019
2020                work_done += cleaned;
2021                /* if we clean as many as budgeted, we must not be done */
2022                if (cleaned >= budget_per_ring)
2023                        clean_complete = false;
2024        }
2025
2026        /* If work not completed, return budget and polling will return */
2027        if (!clean_complete) {
2028tx_only:
2029                if (arm_wb) {
2030                        q_vector->tx.ring[0].tx_stats.tx_force_wb++;
2031                        i40e_enable_wb_on_itr(vsi, q_vector);
2032                }
2033                return budget;
2034        }
2035
2036        if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
2037                q_vector->arm_wb_state = false;
2038
2039        /* Work is done so exit the polling mode and re-enable the interrupt */
2040        napi_complete_done(napi, work_done);
2041        if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
2042                i40e_update_enable_itr(vsi, q_vector);
2043        } else { /* Legacy mode */
2044                i40e_irq_dynamic_enable_icr0(vsi->back, false);
2045        }
2046        return 0;
2047}
2048
2049/**
2050 * i40e_atr - Add a Flow Director ATR filter
2051 * @tx_ring:  ring to add programming descriptor to
2052 * @skb:      send buffer
2053 * @tx_flags: send tx flags
2054 **/
2055static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
2056                     u32 tx_flags)
2057{
2058        struct i40e_filter_program_desc *fdir_desc;
2059        struct i40e_pf *pf = tx_ring->vsi->back;
2060        union {
2061                unsigned char *network;
2062                struct iphdr *ipv4;
2063                struct ipv6hdr *ipv6;
2064        } hdr;
2065        struct tcphdr *th;
2066        unsigned int hlen;
2067        u32 flex_ptype, dtype_cmd;
2068        int l4_proto;
2069        u16 i;
2070
2071        /* make sure ATR is enabled */
2072        if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
2073                return;
2074
2075        if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2076                return;
2077
2078        /* if sampling is disabled do nothing */
2079        if (!tx_ring->atr_sample_rate)
2080                return;
2081
2082        /* Currently only IPv4/IPv6 with TCP is supported */
2083        if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
2084                return;
2085
2086        /* snag network header to get L4 type and address */
2087        hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ?
2088                      skb_inner_network_header(skb) : skb_network_header(skb);
2089
2090        /* Note: tx_flags gets modified to reflect inner protocols in
2091         * tx_enable_csum function if encap is enabled.
2092         */
2093        if (tx_flags & I40E_TX_FLAGS_IPV4) {
2094                /* access ihl as u8 to avoid unaligned access on ia64 */
2095                hlen = (hdr.network[0] & 0x0F) << 2;
2096                l4_proto = hdr.ipv4->protocol;
2097        } else {
2098                hlen = hdr.network - skb->data;
2099                l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
2100                hlen -= hdr.network - skb->data;
2101        }
2102
2103        if (l4_proto != IPPROTO_TCP)
2104                return;
2105
2106        th = (struct tcphdr *)(hdr.network + hlen);
2107
2108        /* Due to lack of space, no more new filters can be programmed */
2109        if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2110                return;
2111        if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2112            (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) {
2113                /* HW ATR eviction will take care of removing filters on FIN
2114                 * and RST packets.
2115                 */
2116                if (th->fin || th->rst)
2117                        return;
2118        }
2119
2120        tx_ring->atr_count++;
2121
2122        /* sample on all syn/fin/rst packets or once every atr sample rate */
2123        if (!th->fin &&
2124            !th->syn &&
2125            !th->rst &&
2126            (tx_ring->atr_count < tx_ring->atr_sample_rate))
2127                return;
2128
2129        tx_ring->atr_count = 0;
2130
2131        /* grab the next descriptor */
2132        i = tx_ring->next_to_use;
2133        fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2134
2135        i++;
2136        tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2137
2138        flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2139                      I40E_TXD_FLTR_QW0_QINDEX_MASK;
2140        flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ?
2141                      (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2142                       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2143                      (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2144                       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2145
2146        flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2147
2148        dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2149
2150        dtype_cmd |= (th->fin || th->rst) ?
2151                     (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2152                      I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2153                     (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2154                      I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2155
2156        dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2157                     I40E_TXD_FLTR_QW1_DEST_SHIFT;
2158
2159        dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2160                     I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2161
2162        dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2163        if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL))
2164                dtype_cmd |=
2165                        ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2166                        I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2167                        I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2168        else
2169                dtype_cmd |=
2170                        ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2171                        I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2172                        I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2173
2174        if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2175            (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)))
2176                dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
2177
2178        fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2179        fdir_desc->rsvd = cpu_to_le32(0);
2180        fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2181        fdir_desc->fd_id = cpu_to_le32(0);
2182}
2183
2184/**
2185 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2186 * @skb:     send buffer
2187 * @tx_ring: ring to send buffer on
2188 * @flags:   the tx flags to be set
2189 *
2190 * Checks the skb and set up correspondingly several generic transmit flags
2191 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2192 *
2193 * Returns error code indicate the frame should be dropped upon error and the
2194 * otherwise  returns 0 to indicate the flags has been set properly.
2195 **/
2196#ifdef I40E_FCOE
2197inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2198                                      struct i40e_ring *tx_ring,
2199                                      u32 *flags)
2200#else
2201static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2202                                             struct i40e_ring *tx_ring,
2203                                             u32 *flags)
2204#endif
2205{
2206        __be16 protocol = skb->protocol;
2207        u32  tx_flags = 0;
2208
2209        if (protocol == htons(ETH_P_8021Q) &&
2210            !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2211                /* When HW VLAN acceleration is turned off by the user the
2212                 * stack sets the protocol to 8021q so that the driver
2213                 * can take any steps required to support the SW only
2214                 * VLAN handling.  In our case the driver doesn't need
2215                 * to take any further steps so just set the protocol
2216                 * to the encapsulated ethertype.
2217                 */
2218                skb->protocol = vlan_get_protocol(skb);
2219                goto out;
2220        }
2221
2222        /* if we have a HW VLAN tag being added, default to the HW one */
2223        if (skb_vlan_tag_present(skb)) {
2224                tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2225                tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2226        /* else if it is a SW VLAN, check the next protocol and store the tag */
2227        } else if (protocol == htons(ETH_P_8021Q)) {
2228                struct vlan_hdr *vhdr, _vhdr;
2229
2230                vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2231                if (!vhdr)
2232                        return -EINVAL;
2233
2234                protocol = vhdr->h_vlan_encapsulated_proto;
2235                tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2236                tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2237        }
2238
2239        if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2240                goto out;
2241
2242        /* Insert 802.1p priority into VLAN header */
2243        if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2244            (skb->priority != TC_PRIO_CONTROL)) {
2245                tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2246                tx_flags |= (skb->priority & 0x7) <<
2247                                I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2248                if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2249                        struct vlan_ethhdr *vhdr;
2250                        int rc;
2251
2252                        rc = skb_cow_head(skb, 0);
2253                        if (rc < 0)
2254                                return rc;
2255                        vhdr = (struct vlan_ethhdr *)skb->data;
2256                        vhdr->h_vlan_TCI = htons(tx_flags >>
2257                                                 I40E_TX_FLAGS_VLAN_SHIFT);
2258                } else {
2259                        tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2260                }
2261        }
2262
2263out:
2264        *flags = tx_flags;
2265        return 0;
2266}
2267
2268/**
2269 * i40e_tso - set up the tso context descriptor
2270 * @skb:      ptr to the skb we're sending
2271 * @hdr_len:  ptr to the size of the packet header
2272 * @cd_type_cmd_tso_mss: Quad Word 1
2273 *
2274 * Returns 0 if no TSO can happen, 1 if tso is going, or error
2275 **/
2276static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
2277{
2278        u64 cd_cmd, cd_tso_len, cd_mss;
2279        union {
2280                struct iphdr *v4;
2281                struct ipv6hdr *v6;
2282                unsigned char *hdr;
2283        } ip;
2284        union {
2285                struct tcphdr *tcp;
2286                struct udphdr *udp;
2287                unsigned char *hdr;
2288        } l4;
2289        u32 paylen, l4_offset;
2290        int err;
2291
2292        if (skb->ip_summed != CHECKSUM_PARTIAL)
2293                return 0;
2294
2295        if (!skb_is_gso(skb))
2296                return 0;
2297
2298        err = skb_cow_head(skb, 0);
2299        if (err < 0)
2300                return err;
2301
2302        ip.hdr = skb_network_header(skb);
2303        l4.hdr = skb_transport_header(skb);
2304
2305        /* initialize outer IP header fields */
2306        if (ip.v4->version == 4) {
2307                ip.v4->tot_len = 0;
2308                ip.v4->check = 0;
2309        } else {
2310                ip.v6->payload_len = 0;
2311        }
2312
2313        if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
2314                                         SKB_GSO_GRE_CSUM |
2315                                         SKB_GSO_IPXIP4 |
2316                                         SKB_GSO_IPXIP6 |
2317                                         SKB_GSO_UDP_TUNNEL |
2318                                         SKB_GSO_UDP_TUNNEL_CSUM)) {
2319                if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
2320                    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) {
2321                        l4.udp->len = 0;
2322
2323                        /* determine offset of outer transport header */
2324                        l4_offset = l4.hdr - skb->data;
2325
2326                        /* remove payload length from outer checksum */
2327                        paylen = skb->len - l4_offset;
2328                        csum_replace_by_diff(&l4.udp->check, htonl(paylen));
2329                }
2330
2331                /* reset pointers to inner headers */
2332                ip.hdr = skb_inner_network_header(skb);
2333                l4.hdr = skb_inner_transport_header(skb);
2334
2335                /* initialize inner IP header fields */
2336                if (ip.v4->version == 4) {
2337                        ip.v4->tot_len = 0;
2338                        ip.v4->check = 0;
2339                } else {
2340                        ip.v6->payload_len = 0;
2341                }
2342        }
2343
2344        /* determine offset of inner transport header */
2345        l4_offset = l4.hdr - skb->data;
2346
2347        /* remove payload length from inner checksum */
2348        paylen = skb->len - l4_offset;
2349        csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
2350
2351        /* compute length of segmentation header */
2352        *hdr_len = (l4.tcp->doff * 4) + l4_offset;
2353
2354        /* find the field values */
2355        cd_cmd = I40E_TX_CTX_DESC_TSO;
2356        cd_tso_len = skb->len - *hdr_len;
2357        cd_mss = skb_shinfo(skb)->gso_size;
2358        *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2359                                (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2360                                (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2361        return 1;
2362}
2363
2364/**
2365 * i40e_tsyn - set up the tsyn context descriptor
2366 * @tx_ring:  ptr to the ring to send
2367 * @skb:      ptr to the skb we're sending
2368 * @tx_flags: the collected send information
2369 * @cd_type_cmd_tso_mss: Quad Word 1
2370 *
2371 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2372 **/
2373static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2374                     u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2375{
2376        struct i40e_pf *pf;
2377
2378        if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2379                return 0;
2380
2381        /* Tx timestamps cannot be sampled when doing TSO */
2382        if (tx_flags & I40E_TX_FLAGS_TSO)
2383                return 0;
2384
2385        /* only timestamp the outbound packet if the user has requested it and
2386         * we are not already transmitting a packet to be timestamped
2387         */
2388        pf = i40e_netdev_to_pf(tx_ring->netdev);
2389        if (!(pf->flags & I40E_FLAG_PTP))
2390                return 0;
2391
2392        if (pf->ptp_tx &&
2393            !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2394                skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2395                pf->ptp_tx_skb = skb_get(skb);
2396        } else {
2397                return 0;
2398        }
2399
2400        *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2401                                I40E_TXD_CTX_QW1_CMD_SHIFT;
2402
2403        return 1;
2404}
2405
2406/**
2407 * i40e_tx_enable_csum - Enable Tx checksum offloads
2408 * @skb: send buffer
2409 * @tx_flags: pointer to Tx flags currently set
2410 * @td_cmd: Tx descriptor command bits to set
2411 * @td_offset: Tx descriptor header offsets to set
2412 * @tx_ring: Tx descriptor ring
2413 * @cd_tunneling: ptr to context desc bits
2414 **/
2415static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2416                               u32 *td_cmd, u32 *td_offset,
2417                               struct i40e_ring *tx_ring,
2418                               u32 *cd_tunneling)
2419{
2420        union {
2421                struct iphdr *v4;
2422                struct ipv6hdr *v6;
2423                unsigned char *hdr;
2424        } ip;
2425        union {
2426                struct tcphdr *tcp;
2427                struct udphdr *udp;
2428                unsigned char *hdr;
2429        } l4;
2430        unsigned char *exthdr;
2431        u32 offset, cmd = 0;
2432        __be16 frag_off;
2433        u8 l4_proto = 0;
2434
2435        if (skb->ip_summed != CHECKSUM_PARTIAL)
2436                return 0;
2437
2438        ip.hdr = skb_network_header(skb);
2439        l4.hdr = skb_transport_header(skb);
2440
2441        /* compute outer L2 header size */
2442        offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2443
2444        if (skb->encapsulation) {
2445                u32 tunnel = 0;
2446                /* define outer network header type */
2447                if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2448                        tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2449                                  I40E_TX_CTX_EXT_IP_IPV4 :
2450                                  I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2451
2452                        l4_proto = ip.v4->protocol;
2453                } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2454                        tunnel |= I40E_TX_CTX_EXT_IP_IPV6;
2455
2456                        exthdr = ip.hdr + sizeof(*ip.v6);
2457                        l4_proto = ip.v6->nexthdr;
2458                        if (l4.hdr != exthdr)
2459                                ipv6_skip_exthdr(skb, exthdr - skb->data,
2460                                                 &l4_proto, &frag_off);
2461                }
2462
2463                /* define outer transport */
2464                switch (l4_proto) {
2465                case IPPROTO_UDP:
2466                        tunnel |= I40E_TXD_CTX_UDP_TUNNELING;
2467                        *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2468                        break;
2469                case IPPROTO_GRE:
2470                        tunnel |= I40E_TXD_CTX_GRE_TUNNELING;
2471                        *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2472                        break;
2473                case IPPROTO_IPIP:
2474                case IPPROTO_IPV6:
2475                        *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2476                        l4.hdr = skb_inner_network_header(skb);
2477                        break;
2478                default:
2479                        if (*tx_flags & I40E_TX_FLAGS_TSO)
2480                                return -1;
2481
2482                        skb_checksum_help(skb);
2483                        return 0;
2484                }
2485
2486                /* compute outer L3 header size */
2487                tunnel |= ((l4.hdr - ip.hdr) / 4) <<
2488                          I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
2489
2490                /* switch IP header pointer from outer to inner header */
2491                ip.hdr = skb_inner_network_header(skb);
2492
2493                /* compute tunnel header size */
2494                tunnel |= ((ip.hdr - l4.hdr) / 2) <<
2495                          I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2496
2497                /* indicate if we need to offload outer UDP header */
2498                if ((*tx_flags & I40E_TX_FLAGS_TSO) &&
2499                    !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
2500                    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
2501                        tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
2502
2503                /* record tunnel offload values */
2504                *cd_tunneling |= tunnel;
2505
2506                /* switch L4 header pointer from outer to inner */
2507                l4.hdr = skb_inner_transport_header(skb);
2508                l4_proto = 0;
2509
2510                /* reset type as we transition from outer to inner headers */
2511                *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6);
2512                if (ip.v4->version == 4)
2513                        *tx_flags |= I40E_TX_FLAGS_IPV4;
2514                if (ip.v6->version == 6)
2515                        *tx_flags |= I40E_TX_FLAGS_IPV6;
2516        }
2517
2518        /* Enable IP checksum offloads */
2519        if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2520                l4_proto = ip.v4->protocol;
2521                /* the stack computes the IP header already, the only time we
2522                 * need the hardware to recompute it is in the case of TSO.
2523                 */
2524                cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2525                       I40E_TX_DESC_CMD_IIPT_IPV4_CSUM :
2526                       I40E_TX_DESC_CMD_IIPT_IPV4;
2527        } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2528                cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2529
2530                exthdr = ip.hdr + sizeof(*ip.v6);
2531                l4_proto = ip.v6->nexthdr;
2532                if (l4.hdr != exthdr)
2533                        ipv6_skip_exthdr(skb, exthdr - skb->data,
2534                                         &l4_proto, &frag_off);
2535        }
2536
2537        /* compute inner L3 header size */
2538        offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2539
2540        /* Enable L4 checksum offloads */
2541        switch (l4_proto) {
2542        case IPPROTO_TCP:
2543                /* enable checksum offloads */
2544                cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2545                offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2546                break;
2547        case IPPROTO_SCTP:
2548                /* enable SCTP checksum offload */
2549                cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2550                offset |= (sizeof(struct sctphdr) >> 2) <<
2551                          I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2552                break;
2553        case IPPROTO_UDP:
2554                /* enable UDP checksum offload */
2555                cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2556                offset |= (sizeof(struct udphdr) >> 2) <<
2557                          I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2558                break;
2559        default:
2560                if (*tx_flags & I40E_TX_FLAGS_TSO)
2561                        return -1;
2562                skb_checksum_help(skb);
2563                return 0;
2564        }
2565
2566        *td_cmd |= cmd;
2567        *td_offset |= offset;
2568
2569        return 1;
2570}
2571
2572/**
2573 * i40e_create_tx_ctx Build the Tx context descriptor
2574 * @tx_ring:  ring to create the descriptor on
2575 * @cd_type_cmd_tso_mss: Quad Word 1
2576 * @cd_tunneling: Quad Word 0 - bits 0-31
2577 * @cd_l2tag2: Quad Word 0 - bits 32-63
2578 **/
2579static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2580                               const u64 cd_type_cmd_tso_mss,
2581                               const u32 cd_tunneling, const u32 cd_l2tag2)
2582{
2583        struct i40e_tx_context_desc *context_desc;
2584        int i = tx_ring->next_to_use;
2585
2586        if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2587            !cd_tunneling && !cd_l2tag2)
2588                return;
2589
2590        /* grab the next descriptor */
2591        context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2592
2593        i++;
2594        tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2595
2596        /* cpu_to_le32 and assign to struct fields */
2597        context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2598        context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2599        context_desc->rsvd = cpu_to_le16(0);
2600        context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2601}
2602
2603/**
2604 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2605 * @tx_ring: the ring to be checked
2606 * @size:    the size buffer we want to assure is available
2607 *
2608 * Returns -EBUSY if a stop is needed, else 0
2609 **/
2610int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2611{
2612        netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2613        /* Memory barrier before checking head and tail */
2614        smp_mb();
2615
2616        /* Check again in a case another CPU has just made room available. */
2617        if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2618                return -EBUSY;
2619
2620        /* A reprieve! - use start_queue because it doesn't call schedule */
2621        netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2622        ++tx_ring->tx_stats.restart_queue;
2623        return 0;
2624}
2625
2626/**
2627 * __i40e_chk_linearize - Check if there are more than 8 buffers per packet
2628 * @skb:      send buffer
2629 *
2630 * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire
2631 * and so we need to figure out the cases where we need to linearize the skb.
2632 *
2633 * For TSO we need to count the TSO header and segment payload separately.
2634 * As such we need to check cases where we have 7 fragments or more as we
2635 * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
2636 * the segment payload in the first descriptor, and another 7 for the
2637 * fragments.
2638 **/
2639bool __i40e_chk_linearize(struct sk_buff *skb)
2640{
2641        const struct skb_frag_struct *frag, *stale;
2642        int nr_frags, sum;
2643
2644        /* no need to check if number of frags is less than 7 */
2645        nr_frags = skb_shinfo(skb)->nr_frags;
2646        if (nr_frags < (I40E_MAX_BUFFER_TXD - 1))
2647                return false;
2648
2649        /* We need to walk through the list and validate that each group
2650         * of 6 fragments totals at least gso_size.
2651         */
2652        nr_frags -= I40E_MAX_BUFFER_TXD - 2;
2653        frag = &skb_shinfo(skb)->frags[0];
2654
2655        /* Initialize size to the negative value of gso_size minus 1.  We
2656         * use this as the worst case scenerio in which the frag ahead
2657         * of us only provides one byte which is why we are limited to 6
2658         * descriptors for a single transmit as the header and previous
2659         * fragment are already consuming 2 descriptors.
2660         */
2661        sum = 1 - skb_shinfo(skb)->gso_size;
2662
2663        /* Add size of frags 0 through 4 to create our initial sum */
2664        sum += skb_frag_size(frag++);
2665        sum += skb_frag_size(frag++);
2666        sum += skb_frag_size(frag++);
2667        sum += skb_frag_size(frag++);
2668        sum += skb_frag_size(frag++);
2669
2670        /* Walk through fragments adding latest fragment, testing it, and
2671         * then removing stale fragments from the sum.
2672         */
2673        stale = &skb_shinfo(skb)->frags[0];
2674        for (;;) {
2675                sum += skb_frag_size(frag++);
2676
2677                /* if sum is negative we failed to make sufficient progress */
2678                if (sum < 0)
2679                        return true;
2680
2681                if (!nr_frags--)
2682                        break;
2683
2684                sum -= skb_frag_size(stale++);
2685        }
2686
2687        return false;
2688}
2689
2690/**
2691 * i40e_tx_map - Build the Tx descriptor
2692 * @tx_ring:  ring to send buffer on
2693 * @skb:      send buffer
2694 * @first:    first buffer info buffer to use
2695 * @tx_flags: collected send information
2696 * @hdr_len:  size of the packet header
2697 * @td_cmd:   the command field in the descriptor
2698 * @td_offset: offset for checksum or crc
2699 **/
2700#ifdef I40E_FCOE
2701inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2702                        struct i40e_tx_buffer *first, u32 tx_flags,
2703                        const u8 hdr_len, u32 td_cmd, u32 td_offset)
2704#else
2705static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2706                               struct i40e_tx_buffer *first, u32 tx_flags,
2707                               const u8 hdr_len, u32 td_cmd, u32 td_offset)
2708#endif
2709{
2710        unsigned int data_len = skb->data_len;
2711        unsigned int size = skb_headlen(skb);
2712        struct skb_frag_struct *frag;
2713        struct i40e_tx_buffer *tx_bi;
2714        struct i40e_tx_desc *tx_desc;
2715        u16 i = tx_ring->next_to_use;
2716        u32 td_tag = 0;
2717        dma_addr_t dma;
2718        u16 gso_segs;
2719        u16 desc_count = 0;
2720        bool tail_bump = true;
2721        bool do_rs = false;
2722
2723        if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2724                td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2725                td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2726                         I40E_TX_FLAGS_VLAN_SHIFT;
2727        }
2728
2729        if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2730                gso_segs = skb_shinfo(skb)->gso_segs;
2731        else
2732                gso_segs = 1;
2733
2734        /* multiply data chunks by size of headers */
2735        first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2736        first->gso_segs = gso_segs;
2737        first->skb = skb;
2738        first->tx_flags = tx_flags;
2739
2740        dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2741
2742        tx_desc = I40E_TX_DESC(tx_ring, i);
2743        tx_bi = first;
2744
2745        for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2746                unsigned int max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
2747
2748                if (dma_mapping_error(tx_ring->dev, dma))
2749                        goto dma_error;
2750
2751                /* record length, and DMA address */
2752                dma_unmap_len_set(tx_bi, len, size);
2753                dma_unmap_addr_set(tx_bi, dma, dma);
2754
2755                /* align size to end of page */
2756                max_data += -dma & (I40E_MAX_READ_REQ_SIZE - 1);
2757                tx_desc->buffer_addr = cpu_to_le64(dma);
2758
2759                while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2760                        tx_desc->cmd_type_offset_bsz =
2761                                build_ctob(td_cmd, td_offset,
2762                                           max_data, td_tag);
2763
2764                        tx_desc++;
2765                        i++;
2766                        desc_count++;
2767
2768                        if (i == tx_ring->count) {
2769                                tx_desc = I40E_TX_DESC(tx_ring, 0);
2770                                i = 0;
2771                        }
2772
2773                        dma += max_data;
2774                        size -= max_data;
2775
2776                        max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
2777                        tx_desc->buffer_addr = cpu_to_le64(dma);
2778                }
2779
2780                if (likely(!data_len))
2781                        break;
2782
2783                tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2784                                                          size, td_tag);
2785
2786                tx_desc++;
2787                i++;
2788                desc_count++;
2789
2790                if (i == tx_ring->count) {
2791                        tx_desc = I40E_TX_DESC(tx_ring, 0);
2792                        i = 0;
2793                }
2794
2795                size = skb_frag_size(frag);
2796                data_len -= size;
2797
2798                dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2799                                       DMA_TO_DEVICE);
2800
2801                tx_bi = &tx_ring->tx_bi[i];
2802        }
2803
2804        /* set next_to_watch value indicating a packet is present */
2805        first->next_to_watch = tx_desc;
2806
2807        i++;
2808        if (i == tx_ring->count)
2809                i = 0;
2810
2811        tx_ring->next_to_use = i;
2812
2813        netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
2814        i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2815
2816        /* Algorithm to optimize tail and RS bit setting:
2817         * if xmit_more is supported
2818         *      if xmit_more is true
2819         *              do not update tail and do not mark RS bit.
2820         *      if xmit_more is false and last xmit_more was false
2821         *              if every packet spanned less than 4 desc
2822         *                      then set RS bit on 4th packet and update tail
2823         *                      on every packet
2824         *              else
2825         *                      update tail and set RS bit on every packet.
2826         *      if xmit_more is false and last_xmit_more was true
2827         *              update tail and set RS bit.
2828         *
2829         * Optimization: wmb to be issued only in case of tail update.
2830         * Also optimize the Descriptor WB path for RS bit with the same
2831         * algorithm.
2832         *
2833         * Note: If there are less than 4 packets
2834         * pending and interrupts were disabled the service task will
2835         * trigger a force WB.
2836         */
2837        if (skb->xmit_more  &&
2838            !netif_xmit_stopped(txring_txq(tx_ring))) {
2839                tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2840                tail_bump = false;
2841        } else if (!skb->xmit_more &&
2842                   !netif_xmit_stopped(txring_txq(tx_ring)) &&
2843                   (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
2844                   (tx_ring->packet_stride < WB_STRIDE) &&
2845                   (desc_count < WB_STRIDE)) {
2846                tx_ring->packet_stride++;
2847        } else {
2848                tx_ring->packet_stride = 0;
2849                tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2850                do_rs = true;
2851        }
2852        if (do_rs)
2853                tx_ring->packet_stride = 0;
2854
2855        tx_desc->cmd_type_offset_bsz =
2856                        build_ctob(td_cmd, td_offset, size, td_tag) |
2857                        cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
2858                                                  I40E_TX_DESC_CMD_EOP) <<
2859                                                  I40E_TXD_QW1_CMD_SHIFT);
2860
2861        /* notify HW of packet */
2862        if (!tail_bump) {
2863                prefetchw(tx_desc + 1);
2864        } else {
2865                /* Force memory writes to complete before letting h/w
2866                 * know there are new descriptors to fetch.  (Only
2867                 * applicable for weak-ordered memory model archs,
2868                 * such as IA-64).
2869                 */
2870                wmb();
2871                writel(i, tx_ring->tail);
2872        }
2873        return;
2874
2875dma_error:
2876        dev_info(tx_ring->dev, "TX DMA map failed\n");
2877
2878        /* clear dma mappings for failed tx_bi map */
2879        for (;;) {
2880                tx_bi = &tx_ring->tx_bi[i];
2881                i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2882                if (tx_bi == first)
2883                        break;
2884                if (i == 0)
2885                        i = tx_ring->count;
2886                i--;
2887        }
2888
2889        tx_ring->next_to_use = i;
2890}
2891
2892/**
2893 * i40e_xmit_frame_ring - Sends buffer on Tx ring
2894 * @skb:     send buffer
2895 * @tx_ring: ring to send buffer on
2896 *
2897 * Returns NETDEV_TX_OK if sent, else an error code
2898 **/
2899static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2900                                        struct i40e_ring *tx_ring)
2901{
2902        u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2903        u32 cd_tunneling = 0, cd_l2tag2 = 0;
2904        struct i40e_tx_buffer *first;
2905        u32 td_offset = 0;
2906        u32 tx_flags = 0;
2907        __be16 protocol;
2908        u32 td_cmd = 0;
2909        u8 hdr_len = 0;
2910        int tso, count;
2911        int tsyn;
2912
2913        /* prefetch the data, we'll need it later */
2914        prefetch(skb->data);
2915
2916        count = i40e_xmit_descriptor_count(skb);
2917        if (i40e_chk_linearize(skb, count)) {
2918                if (__skb_linearize(skb))
2919                        goto out_drop;
2920                count = i40e_txd_use_count(skb->len);
2921                tx_ring->tx_stats.tx_linearize++;
2922        }
2923
2924        /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2925         *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2926         *       + 4 desc gap to avoid the cache line where head is,
2927         *       + 1 desc for context descriptor,
2928         * otherwise try next time
2929         */
2930        if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2931                tx_ring->tx_stats.tx_busy++;
2932                return NETDEV_TX_BUSY;
2933        }
2934
2935        /* prepare the xmit flags */
2936        if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2937                goto out_drop;
2938
2939        /* obtain protocol of skb */
2940        protocol = vlan_get_protocol(skb);
2941
2942        /* record the location of the first descriptor for this packet */
2943        first = &tx_ring->tx_bi[tx_ring->next_to_use];
2944
2945        /* setup IPv4/IPv6 offloads */
2946        if (protocol == htons(ETH_P_IP))
2947                tx_flags |= I40E_TX_FLAGS_IPV4;
2948        else if (protocol == htons(ETH_P_IPV6))
2949                tx_flags |= I40E_TX_FLAGS_IPV6;
2950
2951        tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss);
2952
2953        if (tso < 0)
2954                goto out_drop;
2955        else if (tso)
2956                tx_flags |= I40E_TX_FLAGS_TSO;
2957
2958        /* Always offload the checksum, since it's in the data descriptor */
2959        tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2960                                  tx_ring, &cd_tunneling);
2961        if (tso < 0)
2962                goto out_drop;
2963
2964        tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2965
2966        if (tsyn)
2967                tx_flags |= I40E_TX_FLAGS_TSYN;
2968
2969        skb_tx_timestamp(skb);
2970
2971        /* always enable CRC insertion offload */
2972        td_cmd |= I40E_TX_DESC_CMD_ICRC;
2973
2974        i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2975                           cd_tunneling, cd_l2tag2);
2976
2977        /* Add Flow Director ATR if it's enabled.
2978         *
2979         * NOTE: this must always be directly before the data descriptor.
2980         */
2981        i40e_atr(tx_ring, skb, tx_flags);
2982
2983        i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2984                    td_cmd, td_offset);
2985
2986        return NETDEV_TX_OK;
2987
2988out_drop:
2989        dev_kfree_skb_any(skb);
2990        return NETDEV_TX_OK;
2991}
2992
2993/**
2994 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2995 * @skb:    send buffer
2996 * @netdev: network interface device structure
2997 *
2998 * Returns NETDEV_TX_OK if sent, else an error code
2999 **/
3000netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
3001{
3002        struct i40e_netdev_priv *np = netdev_priv(netdev);
3003        struct i40e_vsi *vsi = np->vsi;
3004        struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
3005
3006        /* hardware can't handle really short frames, hardware padding works
3007         * beyond this point
3008         */
3009        if (skb_put_padto(skb, I40E_MIN_TX_LEN))
3010                return NETDEV_TX_OK;
3011
3012        return i40e_xmit_frame_ring(skb, tx_ring);
3013}
3014