linux/drivers/net/ethernet/cisco/enic/enic_main.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008-2010 Cisco Systems, Inc.  All rights reserved.
   3 * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
   4 *
   5 * This program is free software; you may redistribute it and/or modify
   6 * it under the terms of the GNU General Public License as published by
   7 * the Free Software Foundation; version 2 of the License.
   8 *
   9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  16 * SOFTWARE.
  17 *
  18 */
  19
  20#include <linux/module.h>
  21#include <linux/kernel.h>
  22#include <linux/string.h>
  23#include <linux/errno.h>
  24#include <linux/types.h>
  25#include <linux/init.h>
  26#include <linux/interrupt.h>
  27#include <linux/workqueue.h>
  28#include <linux/pci.h>
  29#include <linux/netdevice.h>
  30#include <linux/etherdevice.h>
  31#include <linux/if.h>
  32#include <linux/if_ether.h>
  33#include <linux/if_vlan.h>
  34#include <linux/in.h>
  35#include <linux/ip.h>
  36#include <linux/ipv6.h>
  37#include <linux/tcp.h>
  38#include <linux/rtnetlink.h>
  39#include <linux/prefetch.h>
  40#include <net/ip6_checksum.h>
  41#include <linux/ktime.h>
  42#include <linux/numa.h>
  43#ifdef CONFIG_RFS_ACCEL
  44#include <linux/cpu_rmap.h>
  45#endif
  46#include <linux/crash_dump.h>
  47#include <net/busy_poll.h>
  48#include <net/vxlan.h>
  49
  50#include "cq_enet_desc.h"
  51#include "vnic_dev.h"
  52#include "vnic_intr.h"
  53#include "vnic_stats.h"
  54#include "vnic_vic.h"
  55#include "enic_res.h"
  56#include "enic.h"
  57#include "enic_dev.h"
  58#include "enic_pp.h"
  59#include "enic_clsf.h"
  60
  61#define ENIC_NOTIFY_TIMER_PERIOD        (2 * HZ)
  62#define WQ_ENET_MAX_DESC_LEN            (1 << WQ_ENET_LEN_BITS)
  63#define MAX_TSO                         (1 << 16)
  64#define ENIC_DESC_MAX_SPLITS            (MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1)
  65
  66#define PCI_DEVICE_ID_CISCO_VIC_ENET         0x0043  /* ethernet vnic */
  67#define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN     0x0044  /* enet dynamic vnic */
  68#define PCI_DEVICE_ID_CISCO_VIC_ENET_VF      0x0071  /* enet SRIOV VF */
  69
  70#define RX_COPYBREAK_DEFAULT            256
  71
  72/* Supported devices */
  73static const struct pci_device_id enic_id_table[] = {
  74        { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) },
  75        { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_DYN) },
  76        { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF) },
  77        { 0, }  /* end of table */
  78};
  79
  80MODULE_DESCRIPTION(DRV_DESCRIPTION);
  81MODULE_AUTHOR("Scott Feldman <scofeldm@cisco.com>");
  82MODULE_LICENSE("GPL");
  83MODULE_DEVICE_TABLE(pci, enic_id_table);
  84
  85#define ENIC_LARGE_PKT_THRESHOLD                1000
  86#define ENIC_MAX_COALESCE_TIMERS                10
  87/*  Interrupt moderation table, which will be used to decide the
  88 *  coalescing timer values
  89 *  {rx_rate in Mbps, mapping percentage of the range}
  90 */
  91static struct enic_intr_mod_table mod_table[ENIC_MAX_COALESCE_TIMERS + 1] = {
  92        {4000,  0},
  93        {4400, 10},
  94        {5060, 20},
  95        {5230, 30},
  96        {5540, 40},
  97        {5820, 50},
  98        {6120, 60},
  99        {6435, 70},
 100        {6745, 80},
 101        {7000, 90},
 102        {0xFFFFFFFF, 100}
 103};
 104
 105/* This table helps the driver to pick different ranges for rx coalescing
 106 * timer depending on the link speed.
 107 */
 108static struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = {
 109        {0,  0}, /* 0  - 4  Gbps */
 110        {0,  3}, /* 4  - 10 Gbps */
 111        {3,  6}, /* 10 - 40 Gbps */
 112};
 113
 114static void enic_init_affinity_hint(struct enic *enic)
 115{
 116        int numa_node = dev_to_node(&enic->pdev->dev);
 117        int i;
 118
 119        for (i = 0; i < enic->intr_count; i++) {
 120                if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) ||
 121                    (cpumask_available(enic->msix[i].affinity_mask) &&
 122                     !cpumask_empty(enic->msix[i].affinity_mask)))
 123                        continue;
 124                if (zalloc_cpumask_var(&enic->msix[i].affinity_mask,
 125                                       GFP_KERNEL))
 126                        cpumask_set_cpu(cpumask_local_spread(i, numa_node),
 127                                        enic->msix[i].affinity_mask);
 128        }
 129}
 130
 131static void enic_free_affinity_hint(struct enic *enic)
 132{
 133        int i;
 134
 135        for (i = 0; i < enic->intr_count; i++) {
 136                if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i))
 137                        continue;
 138                free_cpumask_var(enic->msix[i].affinity_mask);
 139        }
 140}
 141
 142static void enic_set_affinity_hint(struct enic *enic)
 143{
 144        int i;
 145        int err;
 146
 147        for (i = 0; i < enic->intr_count; i++) {
 148                if (enic_is_err_intr(enic, i)           ||
 149                    enic_is_notify_intr(enic, i)        ||
 150                    !cpumask_available(enic->msix[i].affinity_mask) ||
 151                    cpumask_empty(enic->msix[i].affinity_mask))
 152                        continue;
 153                err = irq_set_affinity_hint(enic->msix_entry[i].vector,
 154                                            enic->msix[i].affinity_mask);
 155                if (err)
 156                        netdev_warn(enic->netdev, "irq_set_affinity_hint failed, err %d\n",
 157                                    err);
 158        }
 159
 160        for (i = 0; i < enic->wq_count; i++) {
 161                int wq_intr = enic_msix_wq_intr(enic, i);
 162
 163                if (cpumask_available(enic->msix[wq_intr].affinity_mask) &&
 164                    !cpumask_empty(enic->msix[wq_intr].affinity_mask))
 165                        netif_set_xps_queue(enic->netdev,
 166                                            enic->msix[wq_intr].affinity_mask,
 167                                            i);
 168        }
 169}
 170
 171static void enic_unset_affinity_hint(struct enic *enic)
 172{
 173        int i;
 174
 175        for (i = 0; i < enic->intr_count; i++)
 176                irq_set_affinity_hint(enic->msix_entry[i].vector, NULL);
 177}
 178
 179static int enic_udp_tunnel_set_port(struct net_device *netdev,
 180                                    unsigned int table, unsigned int entry,
 181                                    struct udp_tunnel_info *ti)
 182{
 183        struct enic *enic = netdev_priv(netdev);
 184        int err;
 185
 186        spin_lock_bh(&enic->devcmd_lock);
 187
 188        err = vnic_dev_overlay_offload_cfg(enic->vdev,
 189                                           OVERLAY_CFG_VXLAN_PORT_UPDATE,
 190                                           ntohs(ti->port));
 191        if (err)
 192                goto error;
 193
 194        err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN,
 195                                            enic->vxlan.patch_level);
 196        if (err)
 197                goto error;
 198
 199        enic->vxlan.vxlan_udp_port_number = ntohs(ti->port);
 200error:
 201        spin_unlock_bh(&enic->devcmd_lock);
 202
 203        return err;
 204}
 205
 206static int enic_udp_tunnel_unset_port(struct net_device *netdev,
 207                                      unsigned int table, unsigned int entry,
 208                                      struct udp_tunnel_info *ti)
 209{
 210        struct enic *enic = netdev_priv(netdev);
 211        int err;
 212
 213        spin_lock_bh(&enic->devcmd_lock);
 214
 215        err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN,
 216                                            OVERLAY_OFFLOAD_DISABLE);
 217        if (err)
 218                goto unlock;
 219
 220        enic->vxlan.vxlan_udp_port_number = 0;
 221
 222unlock:
 223        spin_unlock_bh(&enic->devcmd_lock);
 224
 225        return err;
 226}
 227
 228static const struct udp_tunnel_nic_info enic_udp_tunnels = {
 229        .set_port       = enic_udp_tunnel_set_port,
 230        .unset_port     = enic_udp_tunnel_unset_port,
 231        .tables         = {
 232                { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
 233        },
 234}, enic_udp_tunnels_v4 = {
 235        .set_port       = enic_udp_tunnel_set_port,
 236        .unset_port     = enic_udp_tunnel_unset_port,
 237        .flags          = UDP_TUNNEL_NIC_INFO_IPV4_ONLY,
 238        .tables         = {
 239                { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
 240        },
 241};
 242
 243static netdev_features_t enic_features_check(struct sk_buff *skb,
 244                                             struct net_device *dev,
 245                                             netdev_features_t features)
 246{
 247        const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb);
 248        struct enic *enic = netdev_priv(dev);
 249        struct udphdr *udph;
 250        u16 port = 0;
 251        u8 proto;
 252
 253        if (!skb->encapsulation)
 254                return features;
 255
 256        features = vxlan_features_check(skb, features);
 257
 258        switch (vlan_get_protocol(skb)) {
 259        case htons(ETH_P_IPV6):
 260                if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6))
 261                        goto out;
 262                proto = ipv6_hdr(skb)->nexthdr;
 263                break;
 264        case htons(ETH_P_IP):
 265                proto = ip_hdr(skb)->protocol;
 266                break;
 267        default:
 268                goto out;
 269        }
 270
 271        switch (eth->h_proto) {
 272        case ntohs(ETH_P_IPV6):
 273                if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6))
 274                        goto out;
 275                fallthrough;
 276        case ntohs(ETH_P_IP):
 277                break;
 278        default:
 279                goto out;
 280        }
 281
 282
 283        if (proto == IPPROTO_UDP) {
 284                udph = udp_hdr(skb);
 285                port = be16_to_cpu(udph->dest);
 286        }
 287
 288        /* HW supports offload of only one UDP port. Remove CSUM and GSO MASK
 289         * for other UDP port tunnels
 290         */
 291        if (port  != enic->vxlan.vxlan_udp_port_number)
 292                goto out;
 293
 294        return features;
 295
 296out:
 297        return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 298}
 299
 300int enic_is_dynamic(struct enic *enic)
 301{
 302        return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
 303}
 304
 305int enic_sriov_enabled(struct enic *enic)
 306{
 307        return (enic->priv_flags & ENIC_SRIOV_ENABLED) ? 1 : 0;
 308}
 309
 310static int enic_is_sriov_vf(struct enic *enic)
 311{
 312        return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
 313}
 314
 315int enic_is_valid_vf(struct enic *enic, int vf)
 316{
 317#ifdef CONFIG_PCI_IOV
 318        return vf >= 0 && vf < enic->num_vfs;
 319#else
 320        return 0;
 321#endif
 322}
 323
 324static void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf)
 325{
 326        struct enic *enic = vnic_dev_priv(wq->vdev);
 327
 328        if (buf->sop)
 329                dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
 330                                 DMA_TO_DEVICE);
 331        else
 332                dma_unmap_page(&enic->pdev->dev, buf->dma_addr, buf->len,
 333                               DMA_TO_DEVICE);
 334
 335        if (buf->os_buf)
 336                dev_kfree_skb_any(buf->os_buf);
 337}
 338
 339static void enic_wq_free_buf(struct vnic_wq *wq,
 340        struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque)
 341{
 342        enic_free_wq_buf(wq, buf);
 343}
 344
 345static int enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
 346        u8 type, u16 q_number, u16 completed_index, void *opaque)
 347{
 348        struct enic *enic = vnic_dev_priv(vdev);
 349
 350        spin_lock(&enic->wq_lock[q_number]);
 351
 352        vnic_wq_service(&enic->wq[q_number], cq_desc,
 353                completed_index, enic_wq_free_buf,
 354                opaque);
 355
 356        if (netif_tx_queue_stopped(netdev_get_tx_queue(enic->netdev, q_number)) &&
 357            vnic_wq_desc_avail(&enic->wq[q_number]) >=
 358            (MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS))
 359                netif_wake_subqueue(enic->netdev, q_number);
 360
 361        spin_unlock(&enic->wq_lock[q_number]);
 362
 363        return 0;
 364}
 365
 366static bool enic_log_q_error(struct enic *enic)
 367{
 368        unsigned int i;
 369        u32 error_status;
 370        bool err = false;
 371
 372        for (i = 0; i < enic->wq_count; i++) {
 373                error_status = vnic_wq_error_status(&enic->wq[i]);
 374                err |= error_status;
 375                if (error_status)
 376                        netdev_err(enic->netdev, "WQ[%d] error_status %d\n",
 377                                i, error_status);
 378        }
 379
 380        for (i = 0; i < enic->rq_count; i++) {
 381                error_status = vnic_rq_error_status(&enic->rq[i]);
 382                err |= error_status;
 383                if (error_status)
 384                        netdev_err(enic->netdev, "RQ[%d] error_status %d\n",
 385                                i, error_status);
 386        }
 387
 388        return err;
 389}
 390
 391static void enic_msglvl_check(struct enic *enic)
 392{
 393        u32 msg_enable = vnic_dev_msg_lvl(enic->vdev);
 394
 395        if (msg_enable != enic->msg_enable) {
 396                netdev_info(enic->netdev, "msg lvl changed from 0x%x to 0x%x\n",
 397                        enic->msg_enable, msg_enable);
 398                enic->msg_enable = msg_enable;
 399        }
 400}
 401
 402static void enic_mtu_check(struct enic *enic)
 403{
 404        u32 mtu = vnic_dev_mtu(enic->vdev);
 405        struct net_device *netdev = enic->netdev;
 406
 407        if (mtu && mtu != enic->port_mtu) {
 408                enic->port_mtu = mtu;
 409                if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) {
 410                        mtu = max_t(int, ENIC_MIN_MTU,
 411                                min_t(int, ENIC_MAX_MTU, mtu));
 412                        if (mtu != netdev->mtu)
 413                                schedule_work(&enic->change_mtu_work);
 414                } else {
 415                        if (mtu < netdev->mtu)
 416                                netdev_warn(netdev,
 417                                        "interface MTU (%d) set higher "
 418                                        "than switch port MTU (%d)\n",
 419                                        netdev->mtu, mtu);
 420                }
 421        }
 422}
 423
 424static void enic_link_check(struct enic *enic)
 425{
 426        int link_status = vnic_dev_link_status(enic->vdev);
 427        int carrier_ok = netif_carrier_ok(enic->netdev);
 428
 429        if (link_status && !carrier_ok) {
 430                netdev_info(enic->netdev, "Link UP\n");
 431                netif_carrier_on(enic->netdev);
 432        } else if (!link_status && carrier_ok) {
 433                netdev_info(enic->netdev, "Link DOWN\n");
 434                netif_carrier_off(enic->netdev);
 435        }
 436}
 437
 438static void enic_notify_check(struct enic *enic)
 439{
 440        enic_msglvl_check(enic);
 441        enic_mtu_check(enic);
 442        enic_link_check(enic);
 443}
 444
 445#define ENIC_TEST_INTR(pba, i) (pba & (1 << i))
 446
 447static irqreturn_t enic_isr_legacy(int irq, void *data)
 448{
 449        struct net_device *netdev = data;
 450        struct enic *enic = netdev_priv(netdev);
 451        unsigned int io_intr = enic_legacy_io_intr();
 452        unsigned int err_intr = enic_legacy_err_intr();
 453        unsigned int notify_intr = enic_legacy_notify_intr();
 454        u32 pba;
 455
 456        vnic_intr_mask(&enic->intr[io_intr]);
 457
 458        pba = vnic_intr_legacy_pba(enic->legacy_pba);
 459        if (!pba) {
 460                vnic_intr_unmask(&enic->intr[io_intr]);
 461                return IRQ_NONE;        /* not our interrupt */
 462        }
 463
 464        if (ENIC_TEST_INTR(pba, notify_intr)) {
 465                enic_notify_check(enic);
 466                vnic_intr_return_all_credits(&enic->intr[notify_intr]);
 467        }
 468
 469        if (ENIC_TEST_INTR(pba, err_intr)) {
 470                vnic_intr_return_all_credits(&enic->intr[err_intr]);
 471                enic_log_q_error(enic);
 472                /* schedule recovery from WQ/RQ error */
 473                schedule_work(&enic->reset);
 474                return IRQ_HANDLED;
 475        }
 476
 477        if (ENIC_TEST_INTR(pba, io_intr))
 478                napi_schedule_irqoff(&enic->napi[0]);
 479        else
 480                vnic_intr_unmask(&enic->intr[io_intr]);
 481
 482        return IRQ_HANDLED;
 483}
 484
 485static irqreturn_t enic_isr_msi(int irq, void *data)
 486{
 487        struct enic *enic = data;
 488
 489        /* With MSI, there is no sharing of interrupts, so this is
 490         * our interrupt and there is no need to ack it.  The device
 491         * is not providing per-vector masking, so the OS will not
 492         * write to PCI config space to mask/unmask the interrupt.
 493         * We're using mask_on_assertion for MSI, so the device
 494         * automatically masks the interrupt when the interrupt is
 495         * generated.  Later, when exiting polling, the interrupt
 496         * will be unmasked (see enic_poll).
 497         *
 498         * Also, the device uses the same PCIe Traffic Class (TC)
 499         * for Memory Write data and MSI, so there are no ordering
 500         * issues; the MSI will always arrive at the Root Complex
 501         * _after_ corresponding Memory Writes (i.e. descriptor
 502         * writes).
 503         */
 504
 505        napi_schedule_irqoff(&enic->napi[0]);
 506
 507        return IRQ_HANDLED;
 508}
 509
 510static irqreturn_t enic_isr_msix(int irq, void *data)
 511{
 512        struct napi_struct *napi = data;
 513
 514        napi_schedule_irqoff(napi);
 515
 516        return IRQ_HANDLED;
 517}
 518
 519static irqreturn_t enic_isr_msix_err(int irq, void *data)
 520{
 521        struct enic *enic = data;
 522        unsigned int intr = enic_msix_err_intr(enic);
 523
 524        vnic_intr_return_all_credits(&enic->intr[intr]);
 525
 526        if (enic_log_q_error(enic))
 527                /* schedule recovery from WQ/RQ error */
 528                schedule_work(&enic->reset);
 529
 530        return IRQ_HANDLED;
 531}
 532
 533static irqreturn_t enic_isr_msix_notify(int irq, void *data)
 534{
 535        struct enic *enic = data;
 536        unsigned int intr = enic_msix_notify_intr(enic);
 537
 538        enic_notify_check(enic);
 539        vnic_intr_return_all_credits(&enic->intr[intr]);
 540
 541        return IRQ_HANDLED;
 542}
 543
 544static int enic_queue_wq_skb_cont(struct enic *enic, struct vnic_wq *wq,
 545                                  struct sk_buff *skb, unsigned int len_left,
 546                                  int loopback)
 547{
 548        const skb_frag_t *frag;
 549        dma_addr_t dma_addr;
 550
 551        /* Queue additional data fragments */
 552        for (frag = skb_shinfo(skb)->frags; len_left; frag++) {
 553                len_left -= skb_frag_size(frag);
 554                dma_addr = skb_frag_dma_map(&enic->pdev->dev, frag, 0,
 555                                            skb_frag_size(frag),
 556                                            DMA_TO_DEVICE);
 557                if (unlikely(enic_dma_map_check(enic, dma_addr)))
 558                        return -ENOMEM;
 559                enic_queue_wq_desc_cont(wq, skb, dma_addr, skb_frag_size(frag),
 560                                        (len_left == 0),        /* EOP? */
 561                                        loopback);
 562        }
 563
 564        return 0;
 565}
 566
 567static int enic_queue_wq_skb_vlan(struct enic *enic, struct vnic_wq *wq,
 568                                  struct sk_buff *skb, int vlan_tag_insert,
 569                                  unsigned int vlan_tag, int loopback)
 570{
 571        unsigned int head_len = skb_headlen(skb);
 572        unsigned int len_left = skb->len - head_len;
 573        int eop = (len_left == 0);
 574        dma_addr_t dma_addr;
 575        int err = 0;
 576
 577        dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len,
 578                                  DMA_TO_DEVICE);
 579        if (unlikely(enic_dma_map_check(enic, dma_addr)))
 580                return -ENOMEM;
 581
 582        /* Queue the main skb fragment. The fragments are no larger
 583         * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less
 584         * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor
 585         * per fragment is queued.
 586         */
 587        enic_queue_wq_desc(wq, skb, dma_addr, head_len, vlan_tag_insert,
 588                           vlan_tag, eop, loopback);
 589
 590        if (!eop)
 591                err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback);
 592
 593        return err;
 594}
 595
 596static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq,
 597                                     struct sk_buff *skb, int vlan_tag_insert,
 598                                     unsigned int vlan_tag, int loopback)
 599{
 600        unsigned int head_len = skb_headlen(skb);
 601        unsigned int len_left = skb->len - head_len;
 602        unsigned int hdr_len = skb_checksum_start_offset(skb);
 603        unsigned int csum_offset = hdr_len + skb->csum_offset;
 604        int eop = (len_left == 0);
 605        dma_addr_t dma_addr;
 606        int err = 0;
 607
 608        dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len,
 609                                  DMA_TO_DEVICE);
 610        if (unlikely(enic_dma_map_check(enic, dma_addr)))
 611                return -ENOMEM;
 612
 613        /* Queue the main skb fragment. The fragments are no larger
 614         * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less
 615         * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor
 616         * per fragment is queued.
 617         */
 618        enic_queue_wq_desc_csum_l4(wq, skb, dma_addr, head_len, csum_offset,
 619                                   hdr_len, vlan_tag_insert, vlan_tag, eop,
 620                                   loopback);
 621
 622        if (!eop)
 623                err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback);
 624
 625        return err;
 626}
 627
 628static void enic_preload_tcp_csum_encap(struct sk_buff *skb)
 629{
 630        const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb);
 631
 632        switch (eth->h_proto) {
 633        case ntohs(ETH_P_IP):
 634                inner_ip_hdr(skb)->check = 0;
 635                inner_tcp_hdr(skb)->check =
 636                        ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr,
 637                                           inner_ip_hdr(skb)->daddr, 0,
 638                                           IPPROTO_TCP, 0);
 639                break;
 640        case ntohs(ETH_P_IPV6):
 641                inner_tcp_hdr(skb)->check =
 642                        ~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr,
 643                                         &inner_ipv6_hdr(skb)->daddr, 0,
 644                                         IPPROTO_TCP, 0);
 645                break;
 646        default:
 647                WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload");
 648                break;
 649        }
 650}
 651
 652static void enic_preload_tcp_csum(struct sk_buff *skb)
 653{
 654        /* Preload TCP csum field with IP pseudo hdr calculated
 655         * with IP length set to zero.  HW will later add in length
 656         * to each TCP segment resulting from the TSO.
 657         */
 658
 659        if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
 660                ip_hdr(skb)->check = 0;
 661                tcp_hdr(skb)->check = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
 662                        ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
 663        } else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) {
 664                tcp_v6_gso_csum_prep(skb);
 665        }
 666}
 667
 668static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq,
 669                                 struct sk_buff *skb, unsigned int mss,
 670                                 int vlan_tag_insert, unsigned int vlan_tag,
 671                                 int loopback)
 672{
 673        unsigned int frag_len_left = skb_headlen(skb);
 674        unsigned int len_left = skb->len - frag_len_left;
 675        int eop = (len_left == 0);
 676        unsigned int offset = 0;
 677        unsigned int hdr_len;
 678        dma_addr_t dma_addr;
 679        unsigned int len;
 680        skb_frag_t *frag;
 681
 682        if (skb->encapsulation) {
 683                hdr_len = skb_inner_transport_header(skb) - skb->data;
 684                hdr_len += inner_tcp_hdrlen(skb);
 685                enic_preload_tcp_csum_encap(skb);
 686        } else {
 687                hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 688                enic_preload_tcp_csum(skb);
 689        }
 690
 691        /* Queue WQ_ENET_MAX_DESC_LEN length descriptors
 692         * for the main skb fragment
 693         */
 694        while (frag_len_left) {
 695                len = min(frag_len_left, (unsigned int)WQ_ENET_MAX_DESC_LEN);
 696                dma_addr = dma_map_single(&enic->pdev->dev,
 697                                          skb->data + offset, len,
 698                                          DMA_TO_DEVICE);
 699                if (unlikely(enic_dma_map_check(enic, dma_addr)))
 700                        return -ENOMEM;
 701                enic_queue_wq_desc_tso(wq, skb, dma_addr, len, mss, hdr_len,
 702                                       vlan_tag_insert, vlan_tag,
 703                                       eop && (len == frag_len_left), loopback);
 704                frag_len_left -= len;
 705                offset += len;
 706        }
 707
 708        if (eop)
 709                return 0;
 710
 711        /* Queue WQ_ENET_MAX_DESC_LEN length descriptors
 712         * for additional data fragments
 713         */
 714        for (frag = skb_shinfo(skb)->frags; len_left; frag++) {
 715                len_left -= skb_frag_size(frag);
 716                frag_len_left = skb_frag_size(frag);
 717                offset = 0;
 718
 719                while (frag_len_left) {
 720                        len = min(frag_len_left,
 721                                (unsigned int)WQ_ENET_MAX_DESC_LEN);
 722                        dma_addr = skb_frag_dma_map(&enic->pdev->dev, frag,
 723                                                    offset, len,
 724                                                    DMA_TO_DEVICE);
 725                        if (unlikely(enic_dma_map_check(enic, dma_addr)))
 726                                return -ENOMEM;
 727                        enic_queue_wq_desc_cont(wq, skb, dma_addr, len,
 728                                                (len_left == 0) &&
 729                                                 (len == frag_len_left),/*EOP*/
 730                                                loopback);
 731                        frag_len_left -= len;
 732                        offset += len;
 733                }
 734        }
 735
 736        return 0;
 737}
 738
 739static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq,
 740                                          struct sk_buff *skb,
 741                                          int vlan_tag_insert,
 742                                          unsigned int vlan_tag, int loopback)
 743{
 744        unsigned int head_len = skb_headlen(skb);
 745        unsigned int len_left = skb->len - head_len;
 746        /* Hardware will overwrite the checksum fields, calculating from
 747         * scratch and ignoring the value placed by software.
 748         * Offload mode = 00
 749         * mss[2], mss[1], mss[0] bits are set
 750         */
 751        unsigned int mss_or_csum = 7;
 752        int eop = (len_left == 0);
 753        dma_addr_t dma_addr;
 754        int err = 0;
 755
 756        dma_addr = dma_map_single(&enic->pdev->dev, skb->data, head_len,
 757                                  DMA_TO_DEVICE);
 758        if (unlikely(enic_dma_map_check(enic, dma_addr)))
 759                return -ENOMEM;
 760
 761        enic_queue_wq_desc_ex(wq, skb, dma_addr, head_len, mss_or_csum, 0,
 762                              vlan_tag_insert, vlan_tag,
 763                              WQ_ENET_OFFLOAD_MODE_CSUM, eop, 1 /* SOP */, eop,
 764                              loopback);
 765        if (!eop)
 766                err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback);
 767
 768        return err;
 769}
 770
 771static inline void enic_queue_wq_skb(struct enic *enic,
 772        struct vnic_wq *wq, struct sk_buff *skb)
 773{
 774        unsigned int mss = skb_shinfo(skb)->gso_size;
 775        unsigned int vlan_tag = 0;
 776        int vlan_tag_insert = 0;
 777        int loopback = 0;
 778        int err;
 779
 780        if (skb_vlan_tag_present(skb)) {
 781                /* VLAN tag from trunking driver */
 782                vlan_tag_insert = 1;
 783                vlan_tag = skb_vlan_tag_get(skb);
 784        } else if (enic->loop_enable) {
 785                vlan_tag = enic->loop_tag;
 786                loopback = 1;
 787        }
 788
 789        if (mss)
 790                err = enic_queue_wq_skb_tso(enic, wq, skb, mss,
 791                                            vlan_tag_insert, vlan_tag,
 792                                            loopback);
 793        else if (skb->encapsulation)
 794                err = enic_queue_wq_skb_encap(enic, wq, skb, vlan_tag_insert,
 795                                              vlan_tag, loopback);
 796        else if (skb->ip_summed == CHECKSUM_PARTIAL)
 797                err = enic_queue_wq_skb_csum_l4(enic, wq, skb, vlan_tag_insert,
 798                                                vlan_tag, loopback);
 799        else
 800                err = enic_queue_wq_skb_vlan(enic, wq, skb, vlan_tag_insert,
 801                                             vlan_tag, loopback);
 802        if (unlikely(err)) {
 803                struct vnic_wq_buf *buf;
 804
 805                buf = wq->to_use->prev;
 806                /* while not EOP of previous pkt && queue not empty.
 807                 * For all non EOP bufs, os_buf is NULL.
 808                 */
 809                while (!buf->os_buf && (buf->next != wq->to_clean)) {
 810                        enic_free_wq_buf(wq, buf);
 811                        wq->ring.desc_avail++;
 812                        buf = buf->prev;
 813                }
 814                wq->to_use = buf->next;
 815                dev_kfree_skb(skb);
 816        }
 817}
 818
 819/* netif_tx_lock held, process context with BHs disabled, or BH */
 820static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb,
 821        struct net_device *netdev)
 822{
 823        struct enic *enic = netdev_priv(netdev);
 824        struct vnic_wq *wq;
 825        unsigned int txq_map;
 826        struct netdev_queue *txq;
 827
 828        if (skb->len <= 0) {
 829                dev_kfree_skb_any(skb);
 830                return NETDEV_TX_OK;
 831        }
 832
 833        txq_map = skb_get_queue_mapping(skb) % enic->wq_count;
 834        wq = &enic->wq[txq_map];
 835        txq = netdev_get_tx_queue(netdev, txq_map);
 836
 837        /* Non-TSO sends must fit within ENIC_NON_TSO_MAX_DESC descs,
 838         * which is very likely.  In the off chance it's going to take
 839         * more than * ENIC_NON_TSO_MAX_DESC, linearize the skb.
 840         */
 841
 842        if (skb_shinfo(skb)->gso_size == 0 &&
 843            skb_shinfo(skb)->nr_frags + 1 > ENIC_NON_TSO_MAX_DESC &&
 844            skb_linearize(skb)) {
 845                dev_kfree_skb_any(skb);
 846                return NETDEV_TX_OK;
 847        }
 848
 849        spin_lock(&enic->wq_lock[txq_map]);
 850
 851        if (vnic_wq_desc_avail(wq) <
 852            skb_shinfo(skb)->nr_frags + ENIC_DESC_MAX_SPLITS) {
 853                netif_tx_stop_queue(txq);
 854                /* This is a hard error, log it */
 855                netdev_err(netdev, "BUG! Tx ring full when queue awake!\n");
 856                spin_unlock(&enic->wq_lock[txq_map]);
 857                return NETDEV_TX_BUSY;
 858        }
 859
 860        enic_queue_wq_skb(enic, wq, skb);
 861
 862        if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)
 863                netif_tx_stop_queue(txq);
 864        skb_tx_timestamp(skb);
 865        if (!netdev_xmit_more() || netif_xmit_stopped(txq))
 866                vnic_wq_doorbell(wq);
 867
 868        spin_unlock(&enic->wq_lock[txq_map]);
 869
 870        return NETDEV_TX_OK;
 871}
 872
 873/* dev_base_lock rwlock held, nominally process context */
 874static void enic_get_stats(struct net_device *netdev,
 875                           struct rtnl_link_stats64 *net_stats)
 876{
 877        struct enic *enic = netdev_priv(netdev);
 878        struct vnic_stats *stats;
 879        int err;
 880
 881        err = enic_dev_stats_dump(enic, &stats);
 882        /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump
 883         * For other failures, like devcmd failure, we return previously
 884         * recorded stats.
 885         */
 886        if (err == -ENOMEM)
 887                return;
 888
 889        net_stats->tx_packets = stats->tx.tx_frames_ok;
 890        net_stats->tx_bytes = stats->tx.tx_bytes_ok;
 891        net_stats->tx_errors = stats->tx.tx_errors;
 892        net_stats->tx_dropped = stats->tx.tx_drops;
 893
 894        net_stats->rx_packets = stats->rx.rx_frames_ok;
 895        net_stats->rx_bytes = stats->rx.rx_bytes_ok;
 896        net_stats->rx_errors = stats->rx.rx_errors;
 897        net_stats->multicast = stats->rx.rx_multicast_frames_ok;
 898        net_stats->rx_over_errors = enic->rq_truncated_pkts;
 899        net_stats->rx_crc_errors = enic->rq_bad_fcs;
 900        net_stats->rx_dropped = stats->rx.rx_no_bufs + stats->rx.rx_drop;
 901}
 902
 903static int enic_mc_sync(struct net_device *netdev, const u8 *mc_addr)
 904{
 905        struct enic *enic = netdev_priv(netdev);
 906
 907        if (enic->mc_count == ENIC_MULTICAST_PERFECT_FILTERS) {
 908                unsigned int mc_count = netdev_mc_count(netdev);
 909
 910                netdev_warn(netdev, "Registering only %d out of %d multicast addresses\n",
 911                            ENIC_MULTICAST_PERFECT_FILTERS, mc_count);
 912
 913                return -ENOSPC;
 914        }
 915
 916        enic_dev_add_addr(enic, mc_addr);
 917        enic->mc_count++;
 918
 919        return 0;
 920}
 921
 922static int enic_mc_unsync(struct net_device *netdev, const u8 *mc_addr)
 923{
 924        struct enic *enic = netdev_priv(netdev);
 925
 926        enic_dev_del_addr(enic, mc_addr);
 927        enic->mc_count--;
 928
 929        return 0;
 930}
 931
 932static int enic_uc_sync(struct net_device *netdev, const u8 *uc_addr)
 933{
 934        struct enic *enic = netdev_priv(netdev);
 935
 936        if (enic->uc_count == ENIC_UNICAST_PERFECT_FILTERS) {
 937                unsigned int uc_count = netdev_uc_count(netdev);
 938
 939                netdev_warn(netdev, "Registering only %d out of %d unicast addresses\n",
 940                            ENIC_UNICAST_PERFECT_FILTERS, uc_count);
 941
 942                return -ENOSPC;
 943        }
 944
 945        enic_dev_add_addr(enic, uc_addr);
 946        enic->uc_count++;
 947
 948        return 0;
 949}
 950
 951static int enic_uc_unsync(struct net_device *netdev, const u8 *uc_addr)
 952{
 953        struct enic *enic = netdev_priv(netdev);
 954
 955        enic_dev_del_addr(enic, uc_addr);
 956        enic->uc_count--;
 957
 958        return 0;
 959}
 960
 961void enic_reset_addr_lists(struct enic *enic)
 962{
 963        struct net_device *netdev = enic->netdev;
 964
 965        __dev_uc_unsync(netdev, NULL);
 966        __dev_mc_unsync(netdev, NULL);
 967
 968        enic->mc_count = 0;
 969        enic->uc_count = 0;
 970        enic->flags = 0;
 971}
 972
 973static int enic_set_mac_addr(struct net_device *netdev, char *addr)
 974{
 975        struct enic *enic = netdev_priv(netdev);
 976
 977        if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) {
 978                if (!is_valid_ether_addr(addr) && !is_zero_ether_addr(addr))
 979                        return -EADDRNOTAVAIL;
 980        } else {
 981                if (!is_valid_ether_addr(addr))
 982                        return -EADDRNOTAVAIL;
 983        }
 984
 985        memcpy(netdev->dev_addr, addr, netdev->addr_len);
 986
 987        return 0;
 988}
 989
 990static int enic_set_mac_address_dynamic(struct net_device *netdev, void *p)
 991{
 992        struct enic *enic = netdev_priv(netdev);
 993        struct sockaddr *saddr = p;
 994        char *addr = saddr->sa_data;
 995        int err;
 996
 997        if (netif_running(enic->netdev)) {
 998                err = enic_dev_del_station_addr(enic);
 999                if (err)
1000                        return err;
1001        }
1002
1003        err = enic_set_mac_addr(netdev, addr);
1004        if (err)
1005                return err;
1006
1007        if (netif_running(enic->netdev)) {
1008                err = enic_dev_add_station_addr(enic);
1009                if (err)
1010                        return err;
1011        }
1012
1013        return err;
1014}
1015
1016static int enic_set_mac_address(struct net_device *netdev, void *p)
1017{
1018        struct sockaddr *saddr = p;
1019        char *addr = saddr->sa_data;
1020        struct enic *enic = netdev_priv(netdev);
1021        int err;
1022
1023        err = enic_dev_del_station_addr(enic);
1024        if (err)
1025                return err;
1026
1027        err = enic_set_mac_addr(netdev, addr);
1028        if (err)
1029                return err;
1030
1031        return enic_dev_add_station_addr(enic);
1032}
1033
1034/* netif_tx_lock held, BHs disabled */
1035static void enic_set_rx_mode(struct net_device *netdev)
1036{
1037        struct enic *enic = netdev_priv(netdev);
1038        int directed = 1;
1039        int multicast = (netdev->flags & IFF_MULTICAST) ? 1 : 0;
1040        int broadcast = (netdev->flags & IFF_BROADCAST) ? 1 : 0;
1041        int promisc = (netdev->flags & IFF_PROMISC) ||
1042                netdev_uc_count(netdev) > ENIC_UNICAST_PERFECT_FILTERS;
1043        int allmulti = (netdev->flags & IFF_ALLMULTI) ||
1044                netdev_mc_count(netdev) > ENIC_MULTICAST_PERFECT_FILTERS;
1045        unsigned int flags = netdev->flags |
1046                (allmulti ? IFF_ALLMULTI : 0) |
1047                (promisc ? IFF_PROMISC : 0);
1048
1049        if (enic->flags != flags) {
1050                enic->flags = flags;
1051                enic_dev_packet_filter(enic, directed,
1052                        multicast, broadcast, promisc, allmulti);
1053        }
1054
1055        if (!promisc) {
1056                __dev_uc_sync(netdev, enic_uc_sync, enic_uc_unsync);
1057                if (!allmulti)
1058                        __dev_mc_sync(netdev, enic_mc_sync, enic_mc_unsync);
1059        }
1060}
1061
1062/* netif_tx_lock held, BHs disabled */
1063static void enic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1064{
1065        struct enic *enic = netdev_priv(netdev);
1066        schedule_work(&enic->tx_hang_reset);
1067}
1068
1069static int enic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1070{
1071        struct enic *enic = netdev_priv(netdev);
1072        struct enic_port_profile *pp;
1073        int err;
1074
1075        ENIC_PP_BY_INDEX(enic, vf, pp, &err);
1076        if (err)
1077                return err;
1078
1079        if (is_valid_ether_addr(mac) || is_zero_ether_addr(mac)) {
1080                if (vf == PORT_SELF_VF) {
1081                        memcpy(pp->vf_mac, mac, ETH_ALEN);
1082                        return 0;
1083                } else {
1084                        /*
1085                         * For sriov vf's set the mac in hw
1086                         */
1087                        ENIC_DEVCMD_PROXY_BY_INDEX(vf, err, enic,
1088                                vnic_dev_set_mac_addr, mac);
1089                        return enic_dev_status_to_errno(err);
1090                }
1091        } else
1092                return -EINVAL;
1093}
1094
1095static int enic_set_vf_port(struct net_device *netdev, int vf,
1096        struct nlattr *port[])
1097{
1098        struct enic *enic = netdev_priv(netdev);
1099        struct enic_port_profile prev_pp;
1100        struct enic_port_profile *pp;
1101        int err = 0, restore_pp = 1;
1102
1103        ENIC_PP_BY_INDEX(enic, vf, pp, &err);
1104        if (err)
1105                return err;
1106
1107        if (!port[IFLA_PORT_REQUEST])
1108                return -EOPNOTSUPP;
1109
1110        memcpy(&prev_pp, pp, sizeof(*enic->pp));
1111        memset(pp, 0, sizeof(*enic->pp));
1112
1113        pp->set |= ENIC_SET_REQUEST;
1114        pp->request = nla_get_u8(port[IFLA_PORT_REQUEST]);
1115
1116        if (port[IFLA_PORT_PROFILE]) {
1117                pp->set |= ENIC_SET_NAME;
1118                memcpy(pp->name, nla_data(port[IFLA_PORT_PROFILE]),
1119                        PORT_PROFILE_MAX);
1120        }
1121
1122        if (port[IFLA_PORT_INSTANCE_UUID]) {
1123                pp->set |= ENIC_SET_INSTANCE;
1124                memcpy(pp->instance_uuid,
1125                        nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX);
1126        }
1127
1128        if (port[IFLA_PORT_HOST_UUID]) {
1129                pp->set |= ENIC_SET_HOST;
1130                memcpy(pp->host_uuid,
1131                        nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX);
1132        }
1133
1134        if (vf == PORT_SELF_VF) {
1135                /* Special case handling: mac came from IFLA_VF_MAC */
1136                if (!is_zero_ether_addr(prev_pp.vf_mac))
1137                        memcpy(pp->mac_addr, prev_pp.vf_mac, ETH_ALEN);
1138
1139                if (is_zero_ether_addr(netdev->dev_addr))
1140                        eth_hw_addr_random(netdev);
1141        } else {
1142                /* SR-IOV VF: get mac from adapter */
1143                ENIC_DEVCMD_PROXY_BY_INDEX(vf, err, enic,
1144                        vnic_dev_get_mac_addr, pp->mac_addr);
1145                if (err) {
1146                        netdev_err(netdev, "Error getting mac for vf %d\n", vf);
1147                        memcpy(pp, &prev_pp, sizeof(*pp));
1148                        return enic_dev_status_to_errno(err);
1149                }
1150        }
1151
1152        err = enic_process_set_pp_request(enic, vf, &prev_pp, &restore_pp);
1153        if (err) {
1154                if (restore_pp) {
1155                        /* Things are still the way they were: Implicit
1156                         * DISASSOCIATE failed
1157                         */
1158                        memcpy(pp, &prev_pp, sizeof(*pp));
1159                } else {
1160                        memset(pp, 0, sizeof(*pp));
1161                        if (vf == PORT_SELF_VF)
1162                                eth_zero_addr(netdev->dev_addr);
1163                }
1164        } else {
1165                /* Set flag to indicate that the port assoc/disassoc
1166                 * request has been sent out to fw
1167                 */
1168                pp->set |= ENIC_PORT_REQUEST_APPLIED;
1169
1170                /* If DISASSOCIATE, clean up all assigned/saved macaddresses */
1171                if (pp->request == PORT_REQUEST_DISASSOCIATE) {
1172                        eth_zero_addr(pp->mac_addr);
1173                        if (vf == PORT_SELF_VF)
1174                                eth_zero_addr(netdev->dev_addr);
1175                }
1176        }
1177
1178        if (vf == PORT_SELF_VF)
1179                eth_zero_addr(pp->vf_mac);
1180
1181        return err;
1182}
1183
1184static int enic_get_vf_port(struct net_device *netdev, int vf,
1185        struct sk_buff *skb)
1186{
1187        struct enic *enic = netdev_priv(netdev);
1188        u16 response = PORT_PROFILE_RESPONSE_SUCCESS;
1189        struct enic_port_profile *pp;
1190        int err;
1191
1192        ENIC_PP_BY_INDEX(enic, vf, pp, &err);
1193        if (err)
1194                return err;
1195
1196        if (!(pp->set & ENIC_PORT_REQUEST_APPLIED))
1197                return -ENODATA;
1198
1199        err = enic_process_get_pp_request(enic, vf, pp->request, &response);
1200        if (err)
1201                return err;
1202
1203        if (nla_put_u16(skb, IFLA_PORT_REQUEST, pp->request) ||
1204            nla_put_u16(skb, IFLA_PORT_RESPONSE, response) ||
1205            ((pp->set & ENIC_SET_NAME) &&
1206             nla_put(skb, IFLA_PORT_PROFILE, PORT_PROFILE_MAX, pp->name)) ||
1207            ((pp->set & ENIC_SET_INSTANCE) &&
1208             nla_put(skb, IFLA_PORT_INSTANCE_UUID, PORT_UUID_MAX,
1209                     pp->instance_uuid)) ||
1210            ((pp->set & ENIC_SET_HOST) &&
1211             nla_put(skb, IFLA_PORT_HOST_UUID, PORT_UUID_MAX, pp->host_uuid)))
1212                goto nla_put_failure;
1213        return 0;
1214
1215nla_put_failure:
1216        return -EMSGSIZE;
1217}
1218
1219static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf)
1220{
1221        struct enic *enic = vnic_dev_priv(rq->vdev);
1222
1223        if (!buf->os_buf)
1224                return;
1225
1226        dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
1227                         DMA_FROM_DEVICE);
1228        dev_kfree_skb_any(buf->os_buf);
1229        buf->os_buf = NULL;
1230}
1231
1232static int enic_rq_alloc_buf(struct vnic_rq *rq)
1233{
1234        struct enic *enic = vnic_dev_priv(rq->vdev);
1235        struct net_device *netdev = enic->netdev;
1236        struct sk_buff *skb;
1237        unsigned int len = netdev->mtu + VLAN_ETH_HLEN;
1238        unsigned int os_buf_index = 0;
1239        dma_addr_t dma_addr;
1240        struct vnic_rq_buf *buf = rq->to_use;
1241
1242        if (buf->os_buf) {
1243                enic_queue_rq_desc(rq, buf->os_buf, os_buf_index, buf->dma_addr,
1244                                   buf->len);
1245
1246                return 0;
1247        }
1248        skb = netdev_alloc_skb_ip_align(netdev, len);
1249        if (!skb)
1250                return -ENOMEM;
1251
1252        dma_addr = dma_map_single(&enic->pdev->dev, skb->data, len,
1253                                  DMA_FROM_DEVICE);
1254        if (unlikely(enic_dma_map_check(enic, dma_addr))) {
1255                dev_kfree_skb(skb);
1256                return -ENOMEM;
1257        }
1258
1259        enic_queue_rq_desc(rq, skb, os_buf_index,
1260                dma_addr, len);
1261
1262        return 0;
1263}
1264
1265static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size,
1266                                      u32 pkt_len)
1267{
1268        if (ENIC_LARGE_PKT_THRESHOLD <= pkt_len)
1269                pkt_size->large_pkt_bytes_cnt += pkt_len;
1270        else
1271                pkt_size->small_pkt_bytes_cnt += pkt_len;
1272}
1273
1274static bool enic_rxcopybreak(struct net_device *netdev, struct sk_buff **skb,
1275                             struct vnic_rq_buf *buf, u16 len)
1276{
1277        struct enic *enic = netdev_priv(netdev);
1278        struct sk_buff *new_skb;
1279
1280        if (len > enic->rx_copybreak)
1281                return false;
1282        new_skb = netdev_alloc_skb_ip_align(netdev, len);
1283        if (!new_skb)
1284                return false;
1285        dma_sync_single_for_cpu(&enic->pdev->dev, buf->dma_addr, len,
1286                                DMA_FROM_DEVICE);
1287        memcpy(new_skb->data, (*skb)->data, len);
1288        *skb = new_skb;
1289
1290        return true;
1291}
1292
1293static void enic_rq_indicate_buf(struct vnic_rq *rq,
1294        struct cq_desc *cq_desc, struct vnic_rq_buf *buf,
1295        int skipped, void *opaque)
1296{
1297        struct enic *enic = vnic_dev_priv(rq->vdev);
1298        struct net_device *netdev = enic->netdev;
1299        struct sk_buff *skb;
1300        struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
1301
1302        u8 type, color, eop, sop, ingress_port, vlan_stripped;
1303        u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof;
1304        u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok;
1305        u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc;
1306        u8 packet_error;
1307        u16 q_number, completed_index, bytes_written, vlan_tci, checksum;
1308        u32 rss_hash;
1309        bool outer_csum_ok = true, encap = false;
1310
1311        if (skipped)
1312                return;
1313
1314        skb = buf->os_buf;
1315
1316        cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc,
1317                &type, &color, &q_number, &completed_index,
1318                &ingress_port, &fcoe, &eop, &sop, &rss_type,
1319                &csum_not_calc, &rss_hash, &bytes_written,
1320                &packet_error, &vlan_stripped, &vlan_tci, &checksum,
1321                &fcoe_sof, &fcoe_fc_crc_ok, &fcoe_enc_error,
1322                &fcoe_eof, &tcp_udp_csum_ok, &udp, &tcp,
1323                &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment,
1324                &fcs_ok);
1325
1326        if (packet_error) {
1327
1328                if (!fcs_ok) {
1329                        if (bytes_written > 0)
1330                                enic->rq_bad_fcs++;
1331                        else if (bytes_written == 0)
1332                                enic->rq_truncated_pkts++;
1333                }
1334
1335                dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
1336                                 DMA_FROM_DEVICE);
1337                dev_kfree_skb_any(skb);
1338                buf->os_buf = NULL;
1339
1340                return;
1341        }
1342
1343        if (eop && bytes_written > 0) {
1344
1345                /* Good receive
1346                 */
1347
1348                if (!enic_rxcopybreak(netdev, &skb, buf, bytes_written)) {
1349                        buf->os_buf = NULL;
1350                        dma_unmap_single(&enic->pdev->dev, buf->dma_addr,
1351                                         buf->len, DMA_FROM_DEVICE);
1352                }
1353                prefetch(skb->data - NET_IP_ALIGN);
1354
1355                skb_put(skb, bytes_written);
1356                skb->protocol = eth_type_trans(skb, netdev);
1357                skb_record_rx_queue(skb, q_number);
1358                if ((netdev->features & NETIF_F_RXHASH) && rss_hash &&
1359                    (type == 3)) {
1360                        switch (rss_type) {
1361                        case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4:
1362                        case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6:
1363                        case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX:
1364                                skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4);
1365                                break;
1366                        case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4:
1367                        case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6:
1368                        case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX:
1369                                skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3);
1370                                break;
1371                        }
1372                }
1373                if (enic->vxlan.vxlan_udp_port_number) {
1374                        switch (enic->vxlan.patch_level) {
1375                        case 0:
1376                                if (fcoe) {
1377                                        encap = true;
1378                                        outer_csum_ok = fcoe_fc_crc_ok;
1379                                }
1380                                break;
1381                        case 2:
1382                                if ((type == 7) &&
1383                                    (rss_hash & BIT(0))) {
1384                                        encap = true;
1385                                        outer_csum_ok = (rss_hash & BIT(1)) &&
1386                                                        (rss_hash & BIT(2));
1387                                }
1388                                break;
1389                        }
1390                }
1391
1392                /* Hardware does not provide whole packet checksum. It only
1393                 * provides pseudo checksum. Since hw validates the packet
1394                 * checksum but not provide us the checksum value. use
1395                 * CHECSUM_UNNECESSARY.
1396                 *
1397                 * In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is
1398                 * inner csum_ok. outer_csum_ok is set by hw when outer udp
1399                 * csum is correct or is zero.
1400                 */
1401                if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc &&
1402                    tcp_udp_csum_ok && outer_csum_ok &&
1403                    (ipv4_csum_ok || ipv6)) {
1404                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1405                        skb->csum_level = encap;
1406                }
1407
1408                if (vlan_stripped)
1409                        __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
1410
1411                skb_mark_napi_id(skb, &enic->napi[rq->index]);
1412                if (!(netdev->features & NETIF_F_GRO))
1413                        netif_receive_skb(skb);
1414                else
1415                        napi_gro_receive(&enic->napi[q_number], skb);
1416                if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
1417                        enic_intr_update_pkt_size(&cq->pkt_size_counter,
1418                                                  bytes_written);
1419        } else {
1420
1421                /* Buffer overflow
1422                 */
1423
1424                dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len,
1425                                 DMA_FROM_DEVICE);
1426                dev_kfree_skb_any(skb);
1427                buf->os_buf = NULL;
1428        }
1429}
1430
1431static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
1432        u8 type, u16 q_number, u16 completed_index, void *opaque)
1433{
1434        struct enic *enic = vnic_dev_priv(vdev);
1435
1436        vnic_rq_service(&enic->rq[q_number], cq_desc,
1437                completed_index, VNIC_RQ_RETURN_DESC,
1438                enic_rq_indicate_buf, opaque);
1439
1440        return 0;
1441}
1442
1443static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq)
1444{
1445        unsigned int intr = enic_msix_rq_intr(enic, rq->index);
1446        struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
1447        u32 timer = cq->tobe_rx_coal_timeval;
1448
1449        if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) {
1450                vnic_intr_coalescing_timer_set(&enic->intr[intr], timer);
1451                cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval;
1452        }
1453}
1454
1455static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq)
1456{
1457        struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
1458        struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)];
1459        struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter;
1460        int index;
1461        u32 timer;
1462        u32 range_start;
1463        u32 traffic;
1464        u64 delta;
1465        ktime_t now = ktime_get();
1466
1467        delta = ktime_us_delta(now, cq->prev_ts);
1468        if (delta < ENIC_AIC_TS_BREAK)
1469                return;
1470        cq->prev_ts = now;
1471
1472        traffic = pkt_size_counter->large_pkt_bytes_cnt +
1473                  pkt_size_counter->small_pkt_bytes_cnt;
1474        /* The table takes Mbps
1475         * traffic *= 8    => bits
1476         * traffic *= (10^6 / delta)    => bps
1477         * traffic /= 10^6     => Mbps
1478         *
1479         * Combining, traffic *= (8 / delta)
1480         */
1481
1482        traffic <<= 3;
1483        traffic = delta > UINT_MAX ? 0 : traffic / (u32)delta;
1484
1485        for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++)
1486                if (traffic < mod_table[index].rx_rate)
1487                        break;
1488        range_start = (pkt_size_counter->small_pkt_bytes_cnt >
1489                       pkt_size_counter->large_pkt_bytes_cnt << 1) ?
1490                      rx_coal->small_pkt_range_start :
1491                      rx_coal->large_pkt_range_start;
1492        timer = range_start + ((rx_coal->range_end - range_start) *
1493                               mod_table[index].range_percent / 100);
1494        /* Damping */
1495        cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1;
1496
1497        pkt_size_counter->large_pkt_bytes_cnt = 0;
1498        pkt_size_counter->small_pkt_bytes_cnt = 0;
1499}
1500
1501static int enic_poll(struct napi_struct *napi, int budget)
1502{
1503        struct net_device *netdev = napi->dev;
1504        struct enic *enic = netdev_priv(netdev);
1505        unsigned int cq_rq = enic_cq_rq(enic, 0);
1506        unsigned int cq_wq = enic_cq_wq(enic, 0);
1507        unsigned int intr = enic_legacy_io_intr();
1508        unsigned int rq_work_to_do = budget;
1509        unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET;
1510        unsigned int  work_done, rq_work_done = 0, wq_work_done;
1511        int err;
1512
1513        wq_work_done = vnic_cq_service(&enic->cq[cq_wq], wq_work_to_do,
1514                                       enic_wq_service, NULL);
1515
1516        if (budget > 0)
1517                rq_work_done = vnic_cq_service(&enic->cq[cq_rq],
1518                        rq_work_to_do, enic_rq_service, NULL);
1519
1520        /* Accumulate intr event credits for this polling
1521         * cycle.  An intr event is the completion of a
1522         * a WQ or RQ packet.
1523         */
1524
1525        work_done = rq_work_done + wq_work_done;
1526
1527        if (work_done > 0)
1528                vnic_intr_return_credits(&enic->intr[intr],
1529                        work_done,
1530                        0 /* don't unmask intr */,
1531                        0 /* don't reset intr timer */);
1532
1533        err = vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf);
1534
1535        /* Buffer allocation failed. Stay in polling
1536         * mode so we can try to fill the ring again.
1537         */
1538
1539        if (err)
1540                rq_work_done = rq_work_to_do;
1541        if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
1542                /* Call the function which refreshes the intr coalescing timer
1543                 * value based on the traffic.
1544                 */
1545                enic_calc_int_moderation(enic, &enic->rq[0]);
1546
1547        if ((rq_work_done < budget) && napi_complete_done(napi, rq_work_done)) {
1548
1549                /* Some work done, but not enough to stay in polling,
1550                 * exit polling
1551                 */
1552
1553                if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
1554                        enic_set_int_moderation(enic, &enic->rq[0]);
1555                vnic_intr_unmask(&enic->intr[intr]);
1556        }
1557
1558        return rq_work_done;
1559}
1560
1561#ifdef CONFIG_RFS_ACCEL
1562static void enic_free_rx_cpu_rmap(struct enic *enic)
1563{
1564        free_irq_cpu_rmap(enic->netdev->rx_cpu_rmap);
1565        enic->netdev->rx_cpu_rmap = NULL;
1566}
1567
1568static void enic_set_rx_cpu_rmap(struct enic *enic)
1569{
1570        int i, res;
1571
1572        if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) {
1573                enic->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(enic->rq_count);
1574                if (unlikely(!enic->netdev->rx_cpu_rmap))
1575                        return;
1576                for (i = 0; i < enic->rq_count; i++) {
1577                        res = irq_cpu_rmap_add(enic->netdev->rx_cpu_rmap,
1578                                               enic->msix_entry[i].vector);
1579                        if (unlikely(res)) {
1580                                enic_free_rx_cpu_rmap(enic);
1581                                return;
1582                        }
1583                }
1584        }
1585}
1586
1587#else
1588
1589static void enic_free_rx_cpu_rmap(struct enic *enic)
1590{
1591}
1592
1593static void enic_set_rx_cpu_rmap(struct enic *enic)
1594{
1595}
1596
1597#endif /* CONFIG_RFS_ACCEL */
1598
1599static int enic_poll_msix_wq(struct napi_struct *napi, int budget)
1600{
1601        struct net_device *netdev = napi->dev;
1602        struct enic *enic = netdev_priv(netdev);
1603        unsigned int wq_index = (napi - &enic->napi[0]) - enic->rq_count;
1604        struct vnic_wq *wq = &enic->wq[wq_index];
1605        unsigned int cq;
1606        unsigned int intr;
1607        unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET;
1608        unsigned int wq_work_done;
1609        unsigned int wq_irq;
1610
1611        wq_irq = wq->index;
1612        cq = enic_cq_wq(enic, wq_irq);
1613        intr = enic_msix_wq_intr(enic, wq_irq);
1614        wq_work_done = vnic_cq_service(&enic->cq[cq], wq_work_to_do,
1615                                       enic_wq_service, NULL);
1616
1617        vnic_intr_return_credits(&enic->intr[intr], wq_work_done,
1618                                 0 /* don't unmask intr */,
1619                                 1 /* reset intr timer */);
1620        if (!wq_work_done) {
1621                napi_complete(napi);
1622                vnic_intr_unmask(&enic->intr[intr]);
1623                return 0;
1624        }
1625
1626        return budget;
1627}
1628
1629static int enic_poll_msix_rq(struct napi_struct *napi, int budget)
1630{
1631        struct net_device *netdev = napi->dev;
1632        struct enic *enic = netdev_priv(netdev);
1633        unsigned int rq = (napi - &enic->napi[0]);
1634        unsigned int cq = enic_cq_rq(enic, rq);
1635        unsigned int intr = enic_msix_rq_intr(enic, rq);
1636        unsigned int work_to_do = budget;
1637        unsigned int work_done = 0;
1638        int err;
1639
1640        /* Service RQ
1641         */
1642
1643        if (budget > 0)
1644                work_done = vnic_cq_service(&enic->cq[cq],
1645                        work_to_do, enic_rq_service, NULL);
1646
1647        /* Return intr event credits for this polling
1648         * cycle.  An intr event is the completion of a
1649         * RQ packet.
1650         */
1651
1652        if (work_done > 0)
1653                vnic_intr_return_credits(&enic->intr[intr],
1654                        work_done,
1655                        0 /* don't unmask intr */,
1656                        0 /* don't reset intr timer */);
1657
1658        err = vnic_rq_fill(&enic->rq[rq], enic_rq_alloc_buf);
1659
1660        /* Buffer allocation failed. Stay in polling mode
1661         * so we can try to fill the ring again.
1662         */
1663
1664        if (err)
1665                work_done = work_to_do;
1666        if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
1667                /* Call the function which refreshes the intr coalescing timer
1668                 * value based on the traffic.
1669                 */
1670                enic_calc_int_moderation(enic, &enic->rq[rq]);
1671
1672        if ((work_done < budget) && napi_complete_done(napi, work_done)) {
1673
1674                /* Some work done, but not enough to stay in polling,
1675                 * exit polling
1676                 */
1677
1678                if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce)
1679                        enic_set_int_moderation(enic, &enic->rq[rq]);
1680                vnic_intr_unmask(&enic->intr[intr]);
1681        }
1682
1683        return work_done;
1684}
1685
1686static void enic_notify_timer(struct timer_list *t)
1687{
1688        struct enic *enic = from_timer(enic, t, notify_timer);
1689
1690        enic_notify_check(enic);
1691
1692        mod_timer(&enic->notify_timer,
1693                round_jiffies(jiffies + ENIC_NOTIFY_TIMER_PERIOD));
1694}
1695
1696static void enic_free_intr(struct enic *enic)
1697{
1698        struct net_device *netdev = enic->netdev;
1699        unsigned int i;
1700
1701        enic_free_rx_cpu_rmap(enic);
1702        switch (vnic_dev_get_intr_mode(enic->vdev)) {
1703        case VNIC_DEV_INTR_MODE_INTX:
1704                free_irq(enic->pdev->irq, netdev);
1705                break;
1706        case VNIC_DEV_INTR_MODE_MSI:
1707                free_irq(enic->pdev->irq, enic);
1708                break;
1709        case VNIC_DEV_INTR_MODE_MSIX:
1710                for (i = 0; i < ARRAY_SIZE(enic->msix); i++)
1711                        if (enic->msix[i].requested)
1712                                free_irq(enic->msix_entry[i].vector,
1713                                        enic->msix[i].devid);
1714                break;
1715        default:
1716                break;
1717        }
1718}
1719
1720static int enic_request_intr(struct enic *enic)
1721{
1722        struct net_device *netdev = enic->netdev;
1723        unsigned int i, intr;
1724        int err = 0;
1725
1726        enic_set_rx_cpu_rmap(enic);
1727        switch (vnic_dev_get_intr_mode(enic->vdev)) {
1728
1729        case VNIC_DEV_INTR_MODE_INTX:
1730
1731                err = request_irq(enic->pdev->irq, enic_isr_legacy,
1732                        IRQF_SHARED, netdev->name, netdev);
1733                break;
1734
1735        case VNIC_DEV_INTR_MODE_MSI:
1736
1737                err = request_irq(enic->pdev->irq, enic_isr_msi,
1738                        0, netdev->name, enic);
1739                break;
1740
1741        case VNIC_DEV_INTR_MODE_MSIX:
1742
1743                for (i = 0; i < enic->rq_count; i++) {
1744                        intr = enic_msix_rq_intr(enic, i);
1745                        snprintf(enic->msix[intr].devname,
1746                                sizeof(enic->msix[intr].devname),
1747                                "%s-rx-%u", netdev->name, i);
1748                        enic->msix[intr].isr = enic_isr_msix;
1749                        enic->msix[intr].devid = &enic->napi[i];
1750                }
1751
1752                for (i = 0; i < enic->wq_count; i++) {
1753                        int wq = enic_cq_wq(enic, i);
1754
1755                        intr = enic_msix_wq_intr(enic, i);
1756                        snprintf(enic->msix[intr].devname,
1757                                sizeof(enic->msix[intr].devname),
1758                                "%s-tx-%u", netdev->name, i);
1759                        enic->msix[intr].isr = enic_isr_msix;
1760                        enic->msix[intr].devid = &enic->napi[wq];
1761                }
1762
1763                intr = enic_msix_err_intr(enic);
1764                snprintf(enic->msix[intr].devname,
1765                        sizeof(enic->msix[intr].devname),
1766                        "%s-err", netdev->name);
1767                enic->msix[intr].isr = enic_isr_msix_err;
1768                enic->msix[intr].devid = enic;
1769
1770                intr = enic_msix_notify_intr(enic);
1771                snprintf(enic->msix[intr].devname,
1772                        sizeof(enic->msix[intr].devname),
1773                        "%s-notify", netdev->name);
1774                enic->msix[intr].isr = enic_isr_msix_notify;
1775                enic->msix[intr].devid = enic;
1776
1777                for (i = 0; i < ARRAY_SIZE(enic->msix); i++)
1778                        enic->msix[i].requested = 0;
1779
1780                for (i = 0; i < enic->intr_count; i++) {
1781                        err = request_irq(enic->msix_entry[i].vector,
1782                                enic->msix[i].isr, 0,
1783                                enic->msix[i].devname,
1784                                enic->msix[i].devid);
1785                        if (err) {
1786                                enic_free_intr(enic);
1787                                break;
1788                        }
1789                        enic->msix[i].requested = 1;
1790                }
1791
1792                break;
1793
1794        default:
1795                break;
1796        }
1797
1798        return err;
1799}
1800
1801static void enic_synchronize_irqs(struct enic *enic)
1802{
1803        unsigned int i;
1804
1805        switch (vnic_dev_get_intr_mode(enic->vdev)) {
1806        case VNIC_DEV_INTR_MODE_INTX:
1807        case VNIC_DEV_INTR_MODE_MSI:
1808                synchronize_irq(enic->pdev->irq);
1809                break;
1810        case VNIC_DEV_INTR_MODE_MSIX:
1811                for (i = 0; i < enic->intr_count; i++)
1812                        synchronize_irq(enic->msix_entry[i].vector);
1813                break;
1814        default:
1815                break;
1816        }
1817}
1818
1819static void enic_set_rx_coal_setting(struct enic *enic)
1820{
1821        unsigned int speed;
1822        int index = -1;
1823        struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
1824
1825        /* 1. Read the link speed from fw
1826         * 2. Pick the default range for the speed
1827         * 3. Update it in enic->rx_coalesce_setting
1828         */
1829        speed = vnic_dev_port_speed(enic->vdev);
1830        if (ENIC_LINK_SPEED_10G < speed)
1831                index = ENIC_LINK_40G_INDEX;
1832        else if (ENIC_LINK_SPEED_4G < speed)
1833                index = ENIC_LINK_10G_INDEX;
1834        else
1835                index = ENIC_LINK_4G_INDEX;
1836
1837        rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start;
1838        rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start;
1839        rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END;
1840
1841        /* Start with the value provided by UCSM */
1842        for (index = 0; index < enic->rq_count; index++)
1843                enic->cq[index].cur_rx_coal_timeval =
1844                                enic->config.intr_timer_usec;
1845
1846        rx_coal->use_adaptive_rx_coalesce = 1;
1847}
1848
1849static int enic_dev_notify_set(struct enic *enic)
1850{
1851        int err;
1852
1853        spin_lock_bh(&enic->devcmd_lock);
1854        switch (vnic_dev_get_intr_mode(enic->vdev)) {
1855        case VNIC_DEV_INTR_MODE_INTX:
1856                err = vnic_dev_notify_set(enic->vdev,
1857                        enic_legacy_notify_intr());
1858                break;
1859        case VNIC_DEV_INTR_MODE_MSIX:
1860                err = vnic_dev_notify_set(enic->vdev,
1861                        enic_msix_notify_intr(enic));
1862                break;
1863        default:
1864                err = vnic_dev_notify_set(enic->vdev, -1 /* no intr */);
1865                break;
1866        }
1867        spin_unlock_bh(&enic->devcmd_lock);
1868
1869        return err;
1870}
1871
1872static void enic_notify_timer_start(struct enic *enic)
1873{
1874        switch (vnic_dev_get_intr_mode(enic->vdev)) {
1875        case VNIC_DEV_INTR_MODE_MSI:
1876                mod_timer(&enic->notify_timer, jiffies);
1877                break;
1878        default:
1879                /* Using intr for notification for INTx/MSI-X */
1880                break;
1881        }
1882}
1883
1884/* rtnl lock is held, process context */
1885static int enic_open(struct net_device *netdev)
1886{
1887        struct enic *enic = netdev_priv(netdev);
1888        unsigned int i;
1889        int err, ret;
1890
1891        err = enic_request_intr(enic);
1892        if (err) {
1893                netdev_err(netdev, "Unable to request irq.\n");
1894                return err;
1895        }
1896        enic_init_affinity_hint(enic);
1897        enic_set_affinity_hint(enic);
1898
1899        err = enic_dev_notify_set(enic);
1900        if (err) {
1901                netdev_err(netdev,
1902                        "Failed to alloc notify buffer, aborting.\n");
1903                goto err_out_free_intr;
1904        }
1905
1906        for (i = 0; i < enic->rq_count; i++) {
1907                /* enable rq before updating rq desc */
1908                vnic_rq_enable(&enic->rq[i]);
1909                vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf);
1910                /* Need at least one buffer on ring to get going */
1911                if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
1912                        netdev_err(netdev, "Unable to alloc receive buffers\n");
1913                        err = -ENOMEM;
1914                        goto err_out_free_rq;
1915                }
1916        }
1917
1918        for (i = 0; i < enic->wq_count; i++)
1919                vnic_wq_enable(&enic->wq[i]);
1920
1921        if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
1922                enic_dev_add_station_addr(enic);
1923
1924        enic_set_rx_mode(netdev);
1925
1926        netif_tx_wake_all_queues(netdev);
1927
1928        for (i = 0; i < enic->rq_count; i++)
1929                napi_enable(&enic->napi[i]);
1930
1931        if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
1932                for (i = 0; i < enic->wq_count; i++)
1933                        napi_enable(&enic->napi[enic_cq_wq(enic, i)]);
1934        enic_dev_enable(enic);
1935
1936        for (i = 0; i < enic->intr_count; i++)
1937                vnic_intr_unmask(&enic->intr[i]);
1938
1939        enic_notify_timer_start(enic);
1940        enic_rfs_timer_start(enic);
1941
1942        return 0;
1943
1944err_out_free_rq:
1945        for (i = 0; i < enic->rq_count; i++) {
1946                ret = vnic_rq_disable(&enic->rq[i]);
1947                if (!ret)
1948                        vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1949        }
1950        enic_dev_notify_unset(enic);
1951err_out_free_intr:
1952        enic_unset_affinity_hint(enic);
1953        enic_free_intr(enic);
1954
1955        return err;
1956}
1957
1958/* rtnl lock is held, process context */
1959static int enic_stop(struct net_device *netdev)
1960{
1961        struct enic *enic = netdev_priv(netdev);
1962        unsigned int i;
1963        int err;
1964
1965        for (i = 0; i < enic->intr_count; i++) {
1966                vnic_intr_mask(&enic->intr[i]);
1967                (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1968        }
1969
1970        enic_synchronize_irqs(enic);
1971
1972        del_timer_sync(&enic->notify_timer);
1973        enic_rfs_flw_tbl_free(enic);
1974
1975        enic_dev_disable(enic);
1976
1977        for (i = 0; i < enic->rq_count; i++)
1978                napi_disable(&enic->napi[i]);
1979
1980        netif_carrier_off(netdev);
1981        if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
1982                for (i = 0; i < enic->wq_count; i++)
1983                        napi_disable(&enic->napi[enic_cq_wq(enic, i)]);
1984        netif_tx_disable(netdev);
1985
1986        if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
1987                enic_dev_del_station_addr(enic);
1988
1989        for (i = 0; i < enic->wq_count; i++) {
1990                err = vnic_wq_disable(&enic->wq[i]);
1991                if (err)
1992                        return err;
1993        }
1994        for (i = 0; i < enic->rq_count; i++) {
1995                err = vnic_rq_disable(&enic->rq[i]);
1996                if (err)
1997                        return err;
1998        }
1999
2000        enic_dev_notify_unset(enic);
2001        enic_unset_affinity_hint(enic);
2002        enic_free_intr(enic);
2003
2004        for (i = 0; i < enic->wq_count; i++)
2005                vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
2006        for (i = 0; i < enic->rq_count; i++)
2007                vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
2008        for (i = 0; i < enic->cq_count; i++)
2009                vnic_cq_clean(&enic->cq[i]);
2010        for (i = 0; i < enic->intr_count; i++)
2011                vnic_intr_clean(&enic->intr[i]);
2012
2013        return 0;
2014}
2015
2016static int _enic_change_mtu(struct net_device *netdev, int new_mtu)
2017{
2018        bool running = netif_running(netdev);
2019        int err = 0;
2020
2021        ASSERT_RTNL();
2022        if (running) {
2023                err = enic_stop(netdev);
2024                if (err)
2025                        return err;
2026        }
2027
2028        netdev->mtu = new_mtu;
2029
2030        if (running) {
2031                err = enic_open(netdev);
2032                if (err)
2033                        return err;
2034        }
2035
2036        return 0;
2037}
2038
2039static int enic_change_mtu(struct net_device *netdev, int new_mtu)
2040{
2041        struct enic *enic = netdev_priv(netdev);
2042
2043        if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic))
2044                return -EOPNOTSUPP;
2045
2046        if (netdev->mtu > enic->port_mtu)
2047                netdev_warn(netdev,
2048                            "interface MTU (%d) set higher than port MTU (%d)\n",
2049                            netdev->mtu, enic->port_mtu);
2050
2051        return _enic_change_mtu(netdev, new_mtu);
2052}
2053
2054static void enic_change_mtu_work(struct work_struct *work)
2055{
2056        struct enic *enic = container_of(work, struct enic, change_mtu_work);
2057        struct net_device *netdev = enic->netdev;
2058        int new_mtu = vnic_dev_mtu(enic->vdev);
2059
2060        rtnl_lock();
2061        (void)_enic_change_mtu(netdev, new_mtu);
2062        rtnl_unlock();
2063
2064        netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu);
2065}
2066
2067#ifdef CONFIG_NET_POLL_CONTROLLER
2068static void enic_poll_controller(struct net_device *netdev)
2069{
2070        struct enic *enic = netdev_priv(netdev);
2071        struct vnic_dev *vdev = enic->vdev;
2072        unsigned int i, intr;
2073
2074        switch (vnic_dev_get_intr_mode(vdev)) {
2075        case VNIC_DEV_INTR_MODE_MSIX:
2076                for (i = 0; i < enic->rq_count; i++) {
2077                        intr = enic_msix_rq_intr(enic, i);
2078                        enic_isr_msix(enic->msix_entry[intr].vector,
2079                                      &enic->napi[i]);
2080                }
2081
2082                for (i = 0; i < enic->wq_count; i++) {
2083                        intr = enic_msix_wq_intr(enic, i);
2084                        enic_isr_msix(enic->msix_entry[intr].vector,
2085                                      &enic->napi[enic_cq_wq(enic, i)]);
2086                }
2087
2088                break;
2089        case VNIC_DEV_INTR_MODE_MSI:
2090                enic_isr_msi(enic->pdev->irq, enic);
2091                break;
2092        case VNIC_DEV_INTR_MODE_INTX:
2093                enic_isr_legacy(enic->pdev->irq, netdev);
2094                break;
2095        default:
2096                break;
2097        }
2098}
2099#endif
2100
2101static int enic_dev_wait(struct vnic_dev *vdev,
2102        int (*start)(struct vnic_dev *, int),
2103        int (*finished)(struct vnic_dev *, int *),
2104        int arg)
2105{
2106        unsigned long time;
2107        int done;
2108        int err;
2109
2110        err = start(vdev, arg);
2111        if (err)
2112                return err;
2113
2114        /* Wait for func to complete...2 seconds max
2115         */
2116
2117        time = jiffies + (HZ * 2);
2118        do {
2119
2120                err = finished(vdev, &done);
2121                if (err)
2122                        return err;
2123
2124                if (done)
2125                        return 0;
2126
2127                schedule_timeout_uninterruptible(HZ / 10);
2128
2129        } while (time_after(time, jiffies));
2130
2131        return -ETIMEDOUT;
2132}
2133
2134static int enic_dev_open(struct enic *enic)
2135{
2136        int err;
2137        u32 flags = CMD_OPENF_IG_DESCCACHE;
2138
2139        err = enic_dev_wait(enic->vdev, vnic_dev_open,
2140                vnic_dev_open_done, flags);
2141        if (err)
2142                dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n",
2143                        err);
2144
2145        return err;
2146}
2147
2148static int enic_dev_soft_reset(struct enic *enic)
2149{
2150        int err;
2151
2152        err = enic_dev_wait(enic->vdev, vnic_dev_soft_reset,
2153                            vnic_dev_soft_reset_done, 0);
2154        if (err)
2155                netdev_err(enic->netdev, "vNIC soft reset failed, err %d\n",
2156                           err);
2157
2158        return err;
2159}
2160
2161static int enic_dev_hang_reset(struct enic *enic)
2162{
2163        int err;
2164
2165        err = enic_dev_wait(enic->vdev, vnic_dev_hang_reset,
2166                vnic_dev_hang_reset_done, 0);
2167        if (err)
2168                netdev_err(enic->netdev, "vNIC hang reset failed, err %d\n",
2169                        err);
2170
2171        return err;
2172}
2173
2174int __enic_set_rsskey(struct enic *enic)
2175{
2176        union vnic_rss_key *rss_key_buf_va;
2177        dma_addr_t rss_key_buf_pa;
2178        int i, kidx, bidx, err;
2179
2180        rss_key_buf_va = dma_alloc_coherent(&enic->pdev->dev,
2181                                            sizeof(union vnic_rss_key),
2182                                            &rss_key_buf_pa, GFP_ATOMIC);
2183        if (!rss_key_buf_va)
2184                return -ENOMEM;
2185
2186        for (i = 0; i < ENIC_RSS_LEN; i++) {
2187                kidx = i / ENIC_RSS_BYTES_PER_KEY;
2188                bidx = i % ENIC_RSS_BYTES_PER_KEY;
2189                rss_key_buf_va->key[kidx].b[bidx] = enic->rss_key[i];
2190        }
2191        spin_lock_bh(&enic->devcmd_lock);
2192        err = enic_set_rss_key(enic,
2193                rss_key_buf_pa,
2194                sizeof(union vnic_rss_key));
2195        spin_unlock_bh(&enic->devcmd_lock);
2196
2197        dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_key),
2198                          rss_key_buf_va, rss_key_buf_pa);
2199
2200        return err;
2201}
2202
2203static int enic_set_rsskey(struct enic *enic)
2204{
2205        netdev_rss_key_fill(enic->rss_key, ENIC_RSS_LEN);
2206
2207        return __enic_set_rsskey(enic);
2208}
2209
2210static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits)
2211{
2212        dma_addr_t rss_cpu_buf_pa;
2213        union vnic_rss_cpu *rss_cpu_buf_va = NULL;
2214        unsigned int i;
2215        int err;
2216
2217        rss_cpu_buf_va = dma_alloc_coherent(&enic->pdev->dev,
2218                                            sizeof(union vnic_rss_cpu),
2219                                            &rss_cpu_buf_pa, GFP_ATOMIC);
2220        if (!rss_cpu_buf_va)
2221                return -ENOMEM;
2222
2223        for (i = 0; i < (1 << rss_hash_bits); i++)
2224                (*rss_cpu_buf_va).cpu[i/4].b[i%4] = i % enic->rq_count;
2225
2226        spin_lock_bh(&enic->devcmd_lock);
2227        err = enic_set_rss_cpu(enic,
2228                rss_cpu_buf_pa,
2229                sizeof(union vnic_rss_cpu));
2230        spin_unlock_bh(&enic->devcmd_lock);
2231
2232        dma_free_coherent(&enic->pdev->dev, sizeof(union vnic_rss_cpu),
2233                          rss_cpu_buf_va, rss_cpu_buf_pa);
2234
2235        return err;
2236}
2237
2238static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
2239        u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
2240{
2241        const u8 tso_ipid_split_en = 0;
2242        const u8 ig_vlan_strip_en = 1;
2243        int err;
2244
2245        /* Enable VLAN tag stripping.
2246        */
2247
2248        spin_lock_bh(&enic->devcmd_lock);
2249        err = enic_set_nic_cfg(enic,
2250                rss_default_cpu, rss_hash_type,
2251                rss_hash_bits, rss_base_cpu,
2252                rss_enable, tso_ipid_split_en,
2253                ig_vlan_strip_en);
2254        spin_unlock_bh(&enic->devcmd_lock);
2255
2256        return err;
2257}
2258
2259static int enic_set_rss_nic_cfg(struct enic *enic)
2260{
2261        struct device *dev = enic_get_dev(enic);
2262        const u8 rss_default_cpu = 0;
2263        const u8 rss_hash_bits = 7;
2264        const u8 rss_base_cpu = 0;
2265        u8 rss_hash_type;
2266        int res;
2267        u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1);
2268
2269        spin_lock_bh(&enic->devcmd_lock);
2270        res = vnic_dev_capable_rss_hash_type(enic->vdev, &rss_hash_type);
2271        spin_unlock_bh(&enic->devcmd_lock);
2272        if (res) {
2273                /* defaults for old adapters
2274                 */
2275                rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4      |
2276                                NIC_CFG_RSS_HASH_TYPE_TCP_IPV4  |
2277                                NIC_CFG_RSS_HASH_TYPE_IPV6      |
2278                                NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
2279        }
2280
2281        if (rss_enable) {
2282                if (!enic_set_rsskey(enic)) {
2283                        if (enic_set_rsscpu(enic, rss_hash_bits)) {
2284                                rss_enable = 0;
2285                                dev_warn(dev, "RSS disabled, "
2286                                        "Failed to set RSS cpu indirection table.");
2287                        }
2288                } else {
2289                        rss_enable = 0;
2290                        dev_warn(dev, "RSS disabled, Failed to set RSS key.\n");
2291                }
2292        }
2293
2294        return enic_set_niccfg(enic, rss_default_cpu, rss_hash_type,
2295                rss_hash_bits, rss_base_cpu, rss_enable);
2296}
2297
2298static void enic_set_api_busy(struct enic *enic, bool busy)
2299{
2300        spin_lock(&enic->enic_api_lock);
2301        enic->enic_api_busy = busy;
2302        spin_unlock(&enic->enic_api_lock);
2303}
2304
2305static void enic_reset(struct work_struct *work)
2306{
2307        struct enic *enic = container_of(work, struct enic, reset);
2308
2309        if (!netif_running(enic->netdev))
2310                return;
2311
2312        rtnl_lock();
2313
2314        /* Stop any activity from infiniband */
2315        enic_set_api_busy(enic, true);
2316
2317        enic_stop(enic->netdev);
2318        enic_dev_soft_reset(enic);
2319        enic_reset_addr_lists(enic);
2320        enic_init_vnic_resources(enic);
2321        enic_set_rss_nic_cfg(enic);
2322        enic_dev_set_ig_vlan_rewrite_mode(enic);
2323        enic_open(enic->netdev);
2324
2325        /* Allow infiniband to fiddle with the device again */
2326        enic_set_api_busy(enic, false);
2327
2328        call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev);
2329
2330        rtnl_unlock();
2331}
2332
2333static void enic_tx_hang_reset(struct work_struct *work)
2334{
2335        struct enic *enic = container_of(work, struct enic, tx_hang_reset);
2336
2337        rtnl_lock();
2338
2339        /* Stop any activity from infiniband */
2340        enic_set_api_busy(enic, true);
2341
2342        enic_dev_hang_notify(enic);
2343        enic_stop(enic->netdev);
2344        enic_dev_hang_reset(enic);
2345        enic_reset_addr_lists(enic);
2346        enic_init_vnic_resources(enic);
2347        enic_set_rss_nic_cfg(enic);
2348        enic_dev_set_ig_vlan_rewrite_mode(enic);
2349        enic_open(enic->netdev);
2350
2351        /* Allow infiniband to fiddle with the device again */
2352        enic_set_api_busy(enic, false);
2353
2354        call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev);
2355
2356        rtnl_unlock();
2357}
2358
2359static int enic_set_intr_mode(struct enic *enic)
2360{
2361        unsigned int n = min_t(unsigned int, enic->rq_count, ENIC_RQ_MAX);
2362        unsigned int m = min_t(unsigned int, enic->wq_count, ENIC_WQ_MAX);
2363        unsigned int i;
2364
2365        /* Set interrupt mode (INTx, MSI, MSI-X) depending
2366         * on system capabilities.
2367         *
2368         * Try MSI-X first
2369         *
2370         * We need n RQs, m WQs, n+m CQs, and n+m+2 INTRs
2371         * (the second to last INTR is used for WQ/RQ errors)
2372         * (the last INTR is used for notifications)
2373         */
2374
2375        BUG_ON(ARRAY_SIZE(enic->msix_entry) < n + m + 2);
2376        for (i = 0; i < n + m + 2; i++)
2377                enic->msix_entry[i].entry = i;
2378
2379        /* Use multiple RQs if RSS is enabled
2380         */
2381
2382        if (ENIC_SETTING(enic, RSS) &&
2383            enic->config.intr_mode < 1 &&
2384            enic->rq_count >= n &&
2385            enic->wq_count >= m &&
2386            enic->cq_count >= n + m &&
2387            enic->intr_count >= n + m + 2) {
2388
2389                if (pci_enable_msix_range(enic->pdev, enic->msix_entry,
2390                                          n + m + 2, n + m + 2) > 0) {
2391
2392                        enic->rq_count = n;
2393                        enic->wq_count = m;
2394                        enic->cq_count = n + m;
2395                        enic->intr_count = n + m + 2;
2396
2397                        vnic_dev_set_intr_mode(enic->vdev,
2398                                VNIC_DEV_INTR_MODE_MSIX);
2399
2400                        return 0;
2401                }
2402        }
2403
2404        if (enic->config.intr_mode < 1 &&
2405            enic->rq_count >= 1 &&
2406            enic->wq_count >= m &&
2407            enic->cq_count >= 1 + m &&
2408            enic->intr_count >= 1 + m + 2) {
2409                if (pci_enable_msix_range(enic->pdev, enic->msix_entry,
2410                                          1 + m + 2, 1 + m + 2) > 0) {
2411
2412                        enic->rq_count = 1;
2413                        enic->wq_count = m;
2414                        enic->cq_count = 1 + m;
2415                        enic->intr_count = 1 + m + 2;
2416
2417                        vnic_dev_set_intr_mode(enic->vdev,
2418                                VNIC_DEV_INTR_MODE_MSIX);
2419
2420                        return 0;
2421                }
2422        }
2423
2424        /* Next try MSI
2425         *
2426         * We need 1 RQ, 1 WQ, 2 CQs, and 1 INTR
2427         */
2428
2429        if (enic->config.intr_mode < 2 &&
2430            enic->rq_count >= 1 &&
2431            enic->wq_count >= 1 &&
2432            enic->cq_count >= 2 &&
2433            enic->intr_count >= 1 &&
2434            !pci_enable_msi(enic->pdev)) {
2435
2436                enic->rq_count = 1;
2437                enic->wq_count = 1;
2438                enic->cq_count = 2;
2439                enic->intr_count = 1;
2440
2441                vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSI);
2442
2443                return 0;
2444        }
2445
2446        /* Next try INTx
2447         *
2448         * We need 1 RQ, 1 WQ, 2 CQs, and 3 INTRs
2449         * (the first INTR is used for WQ/RQ)
2450         * (the second INTR is used for WQ/RQ errors)
2451         * (the last INTR is used for notifications)
2452         */
2453
2454        if (enic->config.intr_mode < 3 &&
2455            enic->rq_count >= 1 &&
2456            enic->wq_count >= 1 &&
2457            enic->cq_count >= 2 &&
2458            enic->intr_count >= 3) {
2459
2460                enic->rq_count = 1;
2461                enic->wq_count = 1;
2462                enic->cq_count = 2;
2463                enic->intr_count = 3;
2464
2465                vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_INTX);
2466
2467                return 0;
2468        }
2469
2470        vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN);
2471
2472        return -EINVAL;
2473}
2474
2475static void enic_clear_intr_mode(struct enic *enic)
2476{
2477        switch (vnic_dev_get_intr_mode(enic->vdev)) {
2478        case VNIC_DEV_INTR_MODE_MSIX:
2479                pci_disable_msix(enic->pdev);
2480                break;
2481        case VNIC_DEV_INTR_MODE_MSI:
2482                pci_disable_msi(enic->pdev);
2483                break;
2484        default:
2485                break;
2486        }
2487
2488        vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN);
2489}
2490
2491static const struct net_device_ops enic_netdev_dynamic_ops = {
2492        .ndo_open               = enic_open,
2493        .ndo_stop               = enic_stop,
2494        .ndo_start_xmit         = enic_hard_start_xmit,
2495        .ndo_get_stats64        = enic_get_stats,
2496        .ndo_validate_addr      = eth_validate_addr,
2497        .ndo_set_rx_mode        = enic_set_rx_mode,
2498        .ndo_set_mac_address    = enic_set_mac_address_dynamic,
2499        .ndo_change_mtu         = enic_change_mtu,
2500        .ndo_vlan_rx_add_vid    = enic_vlan_rx_add_vid,
2501        .ndo_vlan_rx_kill_vid   = enic_vlan_rx_kill_vid,
2502        .ndo_tx_timeout         = enic_tx_timeout,
2503        .ndo_set_vf_port        = enic_set_vf_port,
2504        .ndo_get_vf_port        = enic_get_vf_port,
2505        .ndo_set_vf_mac         = enic_set_vf_mac,
2506#ifdef CONFIG_NET_POLL_CONTROLLER
2507        .ndo_poll_controller    = enic_poll_controller,
2508#endif
2509#ifdef CONFIG_RFS_ACCEL
2510        .ndo_rx_flow_steer      = enic_rx_flow_steer,
2511#endif
2512        .ndo_udp_tunnel_add     = udp_tunnel_nic_add_port,
2513        .ndo_udp_tunnel_del     = udp_tunnel_nic_del_port,
2514        .ndo_features_check     = enic_features_check,
2515};
2516
2517static const struct net_device_ops enic_netdev_ops = {
2518        .ndo_open               = enic_open,
2519        .ndo_stop               = enic_stop,
2520        .ndo_start_xmit         = enic_hard_start_xmit,
2521        .ndo_get_stats64        = enic_get_stats,
2522        .ndo_validate_addr      = eth_validate_addr,
2523        .ndo_set_mac_address    = enic_set_mac_address,
2524        .ndo_set_rx_mode        = enic_set_rx_mode,
2525        .ndo_change_mtu         = enic_change_mtu,
2526        .ndo_vlan_rx_add_vid    = enic_vlan_rx_add_vid,
2527        .ndo_vlan_rx_kill_vid   = enic_vlan_rx_kill_vid,
2528        .ndo_tx_timeout         = enic_tx_timeout,
2529        .ndo_set_vf_port        = enic_set_vf_port,
2530        .ndo_get_vf_port        = enic_get_vf_port,
2531        .ndo_set_vf_mac         = enic_set_vf_mac,
2532#ifdef CONFIG_NET_POLL_CONTROLLER
2533        .ndo_poll_controller    = enic_poll_controller,
2534#endif
2535#ifdef CONFIG_RFS_ACCEL
2536        .ndo_rx_flow_steer      = enic_rx_flow_steer,
2537#endif
2538        .ndo_udp_tunnel_add     = udp_tunnel_nic_add_port,
2539        .ndo_udp_tunnel_del     = udp_tunnel_nic_del_port,
2540        .ndo_features_check     = enic_features_check,
2541};
2542
2543static void enic_dev_deinit(struct enic *enic)
2544{
2545        unsigned int i;
2546
2547        for (i = 0; i < enic->rq_count; i++)
2548                __netif_napi_del(&enic->napi[i]);
2549
2550        if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
2551                for (i = 0; i < enic->wq_count; i++)
2552                        __netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]);
2553
2554        /* observe RCU grace period after __netif_napi_del() calls */
2555        synchronize_net();
2556
2557        enic_free_vnic_resources(enic);
2558        enic_clear_intr_mode(enic);
2559        enic_free_affinity_hint(enic);
2560}
2561
2562static void enic_kdump_kernel_config(struct enic *enic)
2563{
2564        if (is_kdump_kernel()) {
2565                dev_info(enic_get_dev(enic), "Running from within kdump kernel. Using minimal resources\n");
2566                enic->rq_count = 1;
2567                enic->wq_count = 1;
2568                enic->config.rq_desc_count = ENIC_MIN_RQ_DESCS;
2569                enic->config.wq_desc_count = ENIC_MIN_WQ_DESCS;
2570                enic->config.mtu = min_t(u16, 1500, enic->config.mtu);
2571        }
2572}
2573
2574static int enic_dev_init(struct enic *enic)
2575{
2576        struct device *dev = enic_get_dev(enic);
2577        struct net_device *netdev = enic->netdev;
2578        unsigned int i;
2579        int err;
2580
2581        /* Get interrupt coalesce timer info */
2582        err = enic_dev_intr_coal_timer_info(enic);
2583        if (err) {
2584                dev_warn(dev, "Using default conversion factor for "
2585                        "interrupt coalesce timer\n");
2586                vnic_dev_intr_coal_timer_info_default(enic->vdev);
2587        }
2588
2589        /* Get vNIC configuration
2590         */
2591
2592        err = enic_get_vnic_config(enic);
2593        if (err) {
2594                dev_err(dev, "Get vNIC configuration failed, aborting\n");
2595                return err;
2596        }
2597
2598        /* Get available resource counts
2599         */
2600
2601        enic_get_res_counts(enic);
2602
2603        /* modify resource count if we are in kdump_kernel
2604         */
2605        enic_kdump_kernel_config(enic);
2606
2607        /* Set interrupt mode based on resource counts and system
2608         * capabilities
2609         */
2610
2611        err = enic_set_intr_mode(enic);
2612        if (err) {
2613                dev_err(dev, "Failed to set intr mode based on resource "
2614                        "counts and system capabilities, aborting\n");
2615                return err;
2616        }
2617
2618        /* Allocate and configure vNIC resources
2619         */
2620
2621        err = enic_alloc_vnic_resources(enic);
2622        if (err) {
2623                dev_err(dev, "Failed to alloc vNIC resources, aborting\n");
2624                goto err_out_free_vnic_resources;
2625        }
2626
2627        enic_init_vnic_resources(enic);
2628
2629        err = enic_set_rss_nic_cfg(enic);
2630        if (err) {
2631                dev_err(dev, "Failed to config nic, aborting\n");
2632                goto err_out_free_vnic_resources;
2633        }
2634
2635        switch (vnic_dev_get_intr_mode(enic->vdev)) {
2636        default:
2637                netif_napi_add(netdev, &enic->napi[0], enic_poll, 64);
2638                break;
2639        case VNIC_DEV_INTR_MODE_MSIX:
2640                for (i = 0; i < enic->rq_count; i++) {
2641                        netif_napi_add(netdev, &enic->napi[i],
2642                                enic_poll_msix_rq, NAPI_POLL_WEIGHT);
2643                }
2644                for (i = 0; i < enic->wq_count; i++)
2645                        netif_napi_add(netdev, &enic->napi[enic_cq_wq(enic, i)],
2646                                       enic_poll_msix_wq, NAPI_POLL_WEIGHT);
2647                break;
2648        }
2649
2650        return 0;
2651
2652err_out_free_vnic_resources:
2653        enic_free_affinity_hint(enic);
2654        enic_clear_intr_mode(enic);
2655        enic_free_vnic_resources(enic);
2656
2657        return err;
2658}
2659
2660static void enic_iounmap(struct enic *enic)
2661{
2662        unsigned int i;
2663
2664        for (i = 0; i < ARRAY_SIZE(enic->bar); i++)
2665                if (enic->bar[i].vaddr)
2666                        iounmap(enic->bar[i].vaddr);
2667}
2668
2669static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2670{
2671        struct device *dev = &pdev->dev;
2672        struct net_device *netdev;
2673        struct enic *enic;
2674        int using_dac = 0;
2675        unsigned int i;
2676        int err;
2677#ifdef CONFIG_PCI_IOV
2678        int pos = 0;
2679#endif
2680        int num_pps = 1;
2681
2682        /* Allocate net device structure and initialize.  Private
2683         * instance data is initialized to zero.
2684         */
2685
2686        netdev = alloc_etherdev_mqs(sizeof(struct enic),
2687                                    ENIC_RQ_MAX, ENIC_WQ_MAX);
2688        if (!netdev)
2689                return -ENOMEM;
2690
2691        pci_set_drvdata(pdev, netdev);
2692
2693        SET_NETDEV_DEV(netdev, &pdev->dev);
2694
2695        enic = netdev_priv(netdev);
2696        enic->netdev = netdev;
2697        enic->pdev = pdev;
2698
2699        /* Setup PCI resources
2700         */
2701
2702        err = pci_enable_device_mem(pdev);
2703        if (err) {
2704                dev_err(dev, "Cannot enable PCI device, aborting\n");
2705                goto err_out_free_netdev;
2706        }
2707
2708        err = pci_request_regions(pdev, DRV_NAME);
2709        if (err) {
2710                dev_err(dev, "Cannot request PCI regions, aborting\n");
2711                goto err_out_disable_device;
2712        }
2713
2714        pci_set_master(pdev);
2715
2716        /* Query PCI controller on system for DMA addressing
2717         * limitation for the device.  Try 47-bit first, and
2718         * fail to 32-bit.
2719         */
2720
2721        err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(47));
2722        if (err) {
2723                err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
2724                if (err) {
2725                        dev_err(dev, "No usable DMA configuration, aborting\n");
2726                        goto err_out_release_regions;
2727                }
2728                err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
2729                if (err) {
2730                        dev_err(dev, "Unable to obtain %u-bit DMA "
2731                                "for consistent allocations, aborting\n", 32);
2732                        goto err_out_release_regions;
2733                }
2734        } else {
2735                err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(47));
2736                if (err) {
2737                        dev_err(dev, "Unable to obtain %u-bit DMA "
2738                                "for consistent allocations, aborting\n", 47);
2739                        goto err_out_release_regions;
2740                }
2741                using_dac = 1;
2742        }
2743
2744        /* Map vNIC resources from BAR0-5
2745         */
2746
2747        for (i = 0; i < ARRAY_SIZE(enic->bar); i++) {
2748                if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
2749                        continue;
2750                enic->bar[i].len = pci_resource_len(pdev, i);
2751                enic->bar[i].vaddr = pci_iomap(pdev, i, enic->bar[i].len);
2752                if (!enic->bar[i].vaddr) {
2753                        dev_err(dev, "Cannot memory-map BAR %d, aborting\n", i);
2754                        err = -ENODEV;
2755                        goto err_out_iounmap;
2756                }
2757                enic->bar[i].bus_addr = pci_resource_start(pdev, i);
2758        }
2759
2760        /* Register vNIC device
2761         */
2762
2763        enic->vdev = vnic_dev_register(NULL, enic, pdev, enic->bar,
2764                ARRAY_SIZE(enic->bar));
2765        if (!enic->vdev) {
2766                dev_err(dev, "vNIC registration failed, aborting\n");
2767                err = -ENODEV;
2768                goto err_out_iounmap;
2769        }
2770
2771        err = vnic_devcmd_init(enic->vdev);
2772
2773        if (err)
2774                goto err_out_vnic_unregister;
2775
2776#ifdef CONFIG_PCI_IOV
2777        /* Get number of subvnics */
2778        pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
2779        if (pos) {
2780                pci_read_config_word(pdev, pos + PCI_SRIOV_TOTAL_VF,
2781                        &enic->num_vfs);
2782                if (enic->num_vfs) {
2783                        err = pci_enable_sriov(pdev, enic->num_vfs);
2784                        if (err) {
2785                                dev_err(dev, "SRIOV enable failed, aborting."
2786                                        " pci_enable_sriov() returned %d\n",
2787                                        err);
2788                                goto err_out_vnic_unregister;
2789                        }
2790                        enic->priv_flags |= ENIC_SRIOV_ENABLED;
2791                        num_pps = enic->num_vfs;
2792                }
2793        }
2794#endif
2795
2796        /* Allocate structure for port profiles */
2797        enic->pp = kcalloc(num_pps, sizeof(*enic->pp), GFP_KERNEL);
2798        if (!enic->pp) {
2799                err = -ENOMEM;
2800                goto err_out_disable_sriov_pp;
2801        }
2802
2803        /* Issue device open to get device in known state
2804         */
2805
2806        err = enic_dev_open(enic);
2807        if (err) {
2808                dev_err(dev, "vNIC dev open failed, aborting\n");
2809                goto err_out_disable_sriov;
2810        }
2811
2812        /* Setup devcmd lock
2813         */
2814
2815        spin_lock_init(&enic->devcmd_lock);
2816        spin_lock_init(&enic->enic_api_lock);
2817
2818        /*
2819         * Set ingress vlan rewrite mode before vnic initialization
2820         */
2821
2822        err = enic_dev_set_ig_vlan_rewrite_mode(enic);
2823        if (err) {
2824                dev_err(dev,
2825                        "Failed to set ingress vlan rewrite mode, aborting.\n");
2826                goto err_out_dev_close;
2827        }
2828
2829        /* Issue device init to initialize the vnic-to-switch link.
2830         * We'll start with carrier off and wait for link UP
2831         * notification later to turn on carrier.  We don't need
2832         * to wait here for the vnic-to-switch link initialization
2833         * to complete; link UP notification is the indication that
2834         * the process is complete.
2835         */
2836
2837        netif_carrier_off(netdev);
2838
2839        /* Do not call dev_init for a dynamic vnic.
2840         * For a dynamic vnic, init_prov_info will be
2841         * called later by an upper layer.
2842         */
2843
2844        if (!enic_is_dynamic(enic)) {
2845                err = vnic_dev_init(enic->vdev, 0);
2846                if (err) {
2847                        dev_err(dev, "vNIC dev init failed, aborting\n");
2848                        goto err_out_dev_close;
2849                }
2850        }
2851
2852        err = enic_dev_init(enic);
2853        if (err) {
2854                dev_err(dev, "Device initialization failed, aborting\n");
2855                goto err_out_dev_close;
2856        }
2857
2858        netif_set_real_num_tx_queues(netdev, enic->wq_count);
2859        netif_set_real_num_rx_queues(netdev, enic->rq_count);
2860
2861        /* Setup notification timer, HW reset task, and wq locks
2862         */
2863
2864        timer_setup(&enic->notify_timer, enic_notify_timer, 0);
2865
2866        enic_rfs_flw_tbl_init(enic);
2867        enic_set_rx_coal_setting(enic);
2868        INIT_WORK(&enic->reset, enic_reset);
2869        INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset);
2870        INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work);
2871
2872        for (i = 0; i < enic->wq_count; i++)
2873                spin_lock_init(&enic->wq_lock[i]);
2874
2875        /* Register net device
2876         */
2877
2878        enic->port_mtu = enic->config.mtu;
2879
2880        err = enic_set_mac_addr(netdev, enic->mac_addr);
2881        if (err) {
2882                dev_err(dev, "Invalid MAC address, aborting\n");
2883                goto err_out_dev_deinit;
2884        }
2885
2886        enic->tx_coalesce_usecs = enic->config.intr_timer_usec;
2887        /* rx coalesce time already got initialized. This gets used
2888         * if adaptive coal is turned off
2889         */
2890        enic->rx_coalesce_usecs = enic->tx_coalesce_usecs;
2891
2892        if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic))
2893                netdev->netdev_ops = &enic_netdev_dynamic_ops;
2894        else
2895                netdev->netdev_ops = &enic_netdev_ops;
2896
2897        netdev->watchdog_timeo = 2 * HZ;
2898        enic_set_ethtool_ops(netdev);
2899
2900        netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
2901        if (ENIC_SETTING(enic, LOOP)) {
2902                netdev->features &= ~NETIF_F_HW_VLAN_CTAG_TX;
2903                enic->loop_enable = 1;
2904                enic->loop_tag = enic->config.loop_tag;
2905                dev_info(dev, "loopback tag=0x%04x\n", enic->loop_tag);
2906        }
2907        if (ENIC_SETTING(enic, TXCSUM))
2908                netdev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM;
2909        if (ENIC_SETTING(enic, TSO))
2910                netdev->hw_features |= NETIF_F_TSO |
2911                        NETIF_F_TSO6 | NETIF_F_TSO_ECN;
2912        if (ENIC_SETTING(enic, RSS))
2913                netdev->hw_features |= NETIF_F_RXHASH;
2914        if (ENIC_SETTING(enic, RXCSUM))
2915                netdev->hw_features |= NETIF_F_RXCSUM;
2916        if (ENIC_SETTING(enic, VXLAN)) {
2917                u64 patch_level;
2918                u64 a1 = 0;
2919
2920                netdev->hw_enc_features |= NETIF_F_RXCSUM               |
2921                                           NETIF_F_TSO                  |
2922                                           NETIF_F_TSO6                 |
2923                                           NETIF_F_TSO_ECN              |
2924                                           NETIF_F_GSO_UDP_TUNNEL       |
2925                                           NETIF_F_HW_CSUM              |
2926                                           NETIF_F_GSO_UDP_TUNNEL_CSUM;
2927                netdev->hw_features |= netdev->hw_enc_features;
2928                /* get bit mask from hw about supported offload bit level
2929                 * BIT(0) = fw supports patch_level 0
2930                 *          fcoe bit = encap
2931                 *          fcoe_fc_crc_ok = outer csum ok
2932                 * BIT(1) = always set by fw
2933                 * BIT(2) = fw supports patch_level 2
2934                 *          BIT(0) in rss_hash = encap
2935                 *          BIT(1,2) in rss_hash = outer_ip_csum_ok/
2936                 *                                 outer_tcp_csum_ok
2937                 * used in enic_rq_indicate_buf
2938                 */
2939                err = vnic_dev_get_supported_feature_ver(enic->vdev,
2940                                                         VIC_FEATURE_VXLAN,
2941                                                         &patch_level, &a1);
2942                if (err)
2943                        patch_level = 0;
2944                enic->vxlan.flags = (u8)a1;
2945                /* mask bits that are supported by driver
2946                 */
2947                patch_level &= BIT_ULL(0) | BIT_ULL(2);
2948                patch_level = fls(patch_level);
2949                patch_level = patch_level ? patch_level - 1 : 0;
2950                enic->vxlan.patch_level = patch_level;
2951
2952                if (vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) == 1 ||
2953                    enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ) {
2954                        netdev->udp_tunnel_nic_info = &enic_udp_tunnels_v4;
2955                        if (enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)
2956                                netdev->udp_tunnel_nic_info = &enic_udp_tunnels;
2957                }
2958        }
2959
2960        netdev->features |= netdev->hw_features;
2961        netdev->vlan_features |= netdev->features;
2962
2963#ifdef CONFIG_RFS_ACCEL
2964        netdev->hw_features |= NETIF_F_NTUPLE;
2965#endif
2966
2967        if (using_dac)
2968                netdev->features |= NETIF_F_HIGHDMA;
2969
2970        netdev->priv_flags |= IFF_UNICAST_FLT;
2971
2972        /* MTU range: 68 - 9000 */
2973        netdev->min_mtu = ENIC_MIN_MTU;
2974        netdev->max_mtu = ENIC_MAX_MTU;
2975        netdev->mtu     = enic->port_mtu;
2976
2977        err = register_netdev(netdev);
2978        if (err) {
2979                dev_err(dev, "Cannot register net device, aborting\n");
2980                goto err_out_dev_deinit;
2981        }
2982        enic->rx_copybreak = RX_COPYBREAK_DEFAULT;
2983
2984        return 0;
2985
2986err_out_dev_deinit:
2987        enic_dev_deinit(enic);
2988err_out_dev_close:
2989        vnic_dev_close(enic->vdev);
2990err_out_disable_sriov:
2991        kfree(enic->pp);
2992err_out_disable_sriov_pp:
2993#ifdef CONFIG_PCI_IOV
2994        if (enic_sriov_enabled(enic)) {
2995                pci_disable_sriov(pdev);
2996                enic->priv_flags &= ~ENIC_SRIOV_ENABLED;
2997        }
2998#endif
2999err_out_vnic_unregister:
3000        vnic_dev_unregister(enic->vdev);
3001err_out_iounmap:
3002        enic_iounmap(enic);
3003err_out_release_regions:
3004        pci_release_regions(pdev);
3005err_out_disable_device:
3006        pci_disable_device(pdev);
3007err_out_free_netdev:
3008        free_netdev(netdev);
3009
3010        return err;
3011}
3012
3013static void enic_remove(struct pci_dev *pdev)
3014{
3015        struct net_device *netdev = pci_get_drvdata(pdev);
3016
3017        if (netdev) {
3018                struct enic *enic = netdev_priv(netdev);
3019
3020                cancel_work_sync(&enic->reset);
3021                cancel_work_sync(&enic->change_mtu_work);
3022                unregister_netdev(netdev);
3023                enic_dev_deinit(enic);
3024                vnic_dev_close(enic->vdev);
3025#ifdef CONFIG_PCI_IOV
3026                if (enic_sriov_enabled(enic)) {
3027                        pci_disable_sriov(pdev);
3028                        enic->priv_flags &= ~ENIC_SRIOV_ENABLED;
3029                }
3030#endif
3031                kfree(enic->pp);
3032                vnic_dev_unregister(enic->vdev);
3033                enic_iounmap(enic);
3034                pci_release_regions(pdev);
3035                pci_disable_device(pdev);
3036                free_netdev(netdev);
3037        }
3038}
3039
3040static struct pci_driver enic_driver = {
3041        .name = DRV_NAME,
3042        .id_table = enic_id_table,
3043        .probe = enic_probe,
3044        .remove = enic_remove,
3045};
3046
3047static int __init enic_init_module(void)
3048{
3049        return pci_register_driver(&enic_driver);
3050}
3051
3052static void __exit enic_cleanup_module(void)
3053{
3054        pci_unregister_driver(&enic_driver);
3055}
3056
3057module_init(enic_init_module);
3058module_exit(enic_cleanup_module);
3059