linux/drivers/infiniband/hw/hfi1/vnic_main.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2017 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48/*
  49 * This file contains HFI1 support for VNIC functionality
  50 */
  51
  52#include <linux/io.h>
  53#include <linux/if_vlan.h>
  54
  55#include "vnic.h"
  56
  57#define HFI_TX_TIMEOUT_MS 1000
  58
  59#define HFI1_VNIC_RCV_Q_SIZE   1024
  60
  61#define HFI1_VNIC_UP 0
  62
  63static DEFINE_SPINLOCK(vport_cntr_lock);
  64
  65static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
  66{
  67        unsigned int rcvctrl_ops = 0;
  68        int ret;
  69
  70        hfi1_init_ctxt(uctxt->sc);
  71
  72        uctxt->do_interrupt = &handle_receive_interrupt;
  73
  74        /* Now allocate the RcvHdr queue and eager buffers. */
  75        ret = hfi1_create_rcvhdrq(dd, uctxt);
  76        if (ret)
  77                goto done;
  78
  79        ret = hfi1_setup_eagerbufs(uctxt);
  80        if (ret)
  81                goto done;
  82
  83        if (uctxt->rcvhdrtail_kvaddr)
  84                clear_rcvhdrtail(uctxt);
  85
  86        rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
  87        rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
  88
  89        if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
  90                rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
  91        if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
  92                rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
  93        if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
  94                rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
  95        if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
  96                rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
  97
  98        hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
  99
 100        uctxt->is_vnic = true;
 101done:
 102        return ret;
 103}
 104
 105static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
 106                              struct hfi1_ctxtdata **vnic_ctxt)
 107{
 108        struct hfi1_ctxtdata *uctxt;
 109        unsigned int ctxt;
 110        int ret;
 111
 112        if (dd->flags & HFI1_FROZEN)
 113                return -EIO;
 114
 115        for (ctxt = dd->first_dyn_alloc_ctxt;
 116             ctxt < dd->num_rcv_contexts; ctxt++)
 117                if (!dd->rcd[ctxt])
 118                        break;
 119
 120        if (ctxt == dd->num_rcv_contexts)
 121                return -EBUSY;
 122
 123        uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node);
 124        if (!uctxt) {
 125                dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
 126                return -ENOMEM;
 127        }
 128
 129        uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
 130                        HFI1_CAP_KGET(NODROP_RHQ_FULL) |
 131                        HFI1_CAP_KGET(NODROP_EGR_FULL) |
 132                        HFI1_CAP_KGET(DMA_RTAIL);
 133        uctxt->seq_cnt = 1;
 134
 135        /* Allocate and enable a PIO send context */
 136        uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize,
 137                             uctxt->numa_id);
 138
 139        ret = uctxt->sc ? 0 : -ENOMEM;
 140        if (ret)
 141                goto bail;
 142
 143        dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n",
 144                   uctxt->sc->sw_index, uctxt->sc->hw_context);
 145        ret = sc_enable(uctxt->sc);
 146        if (ret)
 147                goto bail;
 148
 149        if (dd->num_msix_entries)
 150                hfi1_set_vnic_msix_info(uctxt);
 151
 152        hfi1_stats.sps_ctxts++;
 153        dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
 154        *vnic_ctxt = uctxt;
 155
 156        return ret;
 157bail:
 158        /*
 159         * hfi1_free_ctxtdata() also releases send_context
 160         * structure if uctxt->sc is not null
 161         */
 162        dd->rcd[uctxt->ctxt] = NULL;
 163        hfi1_free_ctxtdata(dd, uctxt);
 164        dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
 165        return ret;
 166}
 167
 168static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
 169                                 struct hfi1_ctxtdata *uctxt)
 170{
 171        unsigned long flags;
 172
 173        dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
 174        flush_wc();
 175
 176        if (dd->num_msix_entries)
 177                hfi1_reset_vnic_msix_info(uctxt);
 178
 179        spin_lock_irqsave(&dd->uctxt_lock, flags);
 180        /*
 181         * Disable receive context and interrupt available, reset all
 182         * RcvCtxtCtrl bits to default values.
 183         */
 184        hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
 185                     HFI1_RCVCTRL_TIDFLOW_DIS |
 186                     HFI1_RCVCTRL_INTRAVAIL_DIS |
 187                     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
 188                     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
 189                     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
 190        /*
 191         * VNIC contexts are allocated from user context pool.
 192         * Release them back to user context pool.
 193         *
 194         * Reset context integrity checks to default.
 195         * (writes to CSRs probably belong in chip.c)
 196         */
 197        write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
 198                        hfi1_pkt_default_send_ctxt_mask(dd, SC_USER));
 199        sc_disable(uctxt->sc);
 200
 201        dd->send_contexts[uctxt->sc->sw_index].type = SC_USER;
 202        spin_unlock_irqrestore(&dd->uctxt_lock, flags);
 203
 204        dd->rcd[uctxt->ctxt] = NULL;
 205        uctxt->event_flags = 0;
 206
 207        hfi1_clear_tids(uctxt);
 208        hfi1_clear_ctxt_pkey(dd, uctxt);
 209
 210        hfi1_stats.sps_ctxts--;
 211        hfi1_free_ctxtdata(dd, uctxt);
 212}
 213
 214void hfi1_vnic_setup(struct hfi1_devdata *dd)
 215{
 216        idr_init(&dd->vnic.vesw_idr);
 217}
 218
 219void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
 220{
 221        idr_destroy(&dd->vnic.vesw_idr);
 222}
 223
 224#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do {            \
 225                u64 *src64, *dst64;                            \
 226                for (src64 = &qstats->x_grp.unicast,           \
 227                        dst64 = &stats->x_grp.unicast;         \
 228                        dst64 <= &stats->x_grp.s_1519_max;) {  \
 229                        *dst64++ += *src64++;                  \
 230                }                                              \
 231        } while (0)
 232
 233/* hfi1_vnic_update_stats - update statistics */
 234static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
 235                                   struct opa_vnic_stats *stats)
 236{
 237        struct net_device *netdev = vinfo->netdev;
 238        u8 i;
 239
 240        /* add tx counters on different queues */
 241        for (i = 0; i < vinfo->num_tx_q; i++) {
 242                struct opa_vnic_stats *qstats = &vinfo->stats[i];
 243                struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
 244
 245                stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
 246                stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
 247                stats->tx_drop_state += qstats->tx_drop_state;
 248                stats->tx_dlid_zero += qstats->tx_dlid_zero;
 249
 250                SUM_GRP_COUNTERS(stats, qstats, tx_grp);
 251                stats->netstats.tx_packets += qnstats->tx_packets;
 252                stats->netstats.tx_bytes += qnstats->tx_bytes;
 253        }
 254
 255        /* add rx counters on different queues */
 256        for (i = 0; i < vinfo->num_rx_q; i++) {
 257                struct opa_vnic_stats *qstats = &vinfo->stats[i];
 258                struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
 259
 260                stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
 261                stats->netstats.rx_nohandler += qnstats->rx_nohandler;
 262                stats->rx_drop_state += qstats->rx_drop_state;
 263                stats->rx_oversize += qstats->rx_oversize;
 264                stats->rx_runt += qstats->rx_runt;
 265
 266                SUM_GRP_COUNTERS(stats, qstats, rx_grp);
 267                stats->netstats.rx_packets += qnstats->rx_packets;
 268                stats->netstats.rx_bytes += qnstats->rx_bytes;
 269        }
 270
 271        stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
 272                                    stats->netstats.tx_carrier_errors +
 273                                    stats->tx_drop_state + stats->tx_dlid_zero;
 274        stats->netstats.tx_dropped = stats->netstats.tx_errors;
 275
 276        stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
 277                                    stats->netstats.rx_nohandler +
 278                                    stats->rx_drop_state + stats->rx_oversize +
 279                                    stats->rx_runt;
 280        stats->netstats.rx_dropped = stats->netstats.rx_errors;
 281
 282        netdev->stats.tx_packets = stats->netstats.tx_packets;
 283        netdev->stats.tx_bytes = stats->netstats.tx_bytes;
 284        netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
 285        netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
 286        netdev->stats.tx_errors = stats->netstats.tx_errors;
 287        netdev->stats.tx_dropped = stats->netstats.tx_dropped;
 288
 289        netdev->stats.rx_packets = stats->netstats.rx_packets;
 290        netdev->stats.rx_bytes = stats->netstats.rx_bytes;
 291        netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
 292        netdev->stats.multicast = stats->rx_grp.mcastbcast;
 293        netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
 294        netdev->stats.rx_errors = stats->netstats.rx_errors;
 295        netdev->stats.rx_dropped = stats->netstats.rx_dropped;
 296}
 297
 298/* update_len_counters - update pkt's len histogram counters */
 299static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
 300                                       int len)
 301{
 302        /* account for 4 byte FCS */
 303        if (len >= 1515)
 304                grp->s_1519_max++;
 305        else if (len >= 1020)
 306                grp->s_1024_1518++;
 307        else if (len >= 508)
 308                grp->s_512_1023++;
 309        else if (len >= 252)
 310                grp->s_256_511++;
 311        else if (len >= 124)
 312                grp->s_128_255++;
 313        else if (len >= 61)
 314                grp->s_65_127++;
 315        else
 316                grp->s_64++;
 317}
 318
 319/* hfi1_vnic_update_tx_counters - update transmit counters */
 320static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
 321                                         u8 q_idx, struct sk_buff *skb, int err)
 322{
 323        struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
 324        struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
 325        struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
 326        u16 vlan_tci;
 327
 328        stats->netstats.tx_packets++;
 329        stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
 330
 331        update_len_counters(tx_grp, skb->len);
 332
 333        /* rest of the counts are for good packets only */
 334        if (unlikely(err))
 335                return;
 336
 337        if (is_multicast_ether_addr(mac_hdr->h_dest))
 338                tx_grp->mcastbcast++;
 339        else
 340                tx_grp->unicast++;
 341
 342        if (!__vlan_get_tag(skb, &vlan_tci))
 343                tx_grp->vlan++;
 344        else
 345                tx_grp->untagged++;
 346}
 347
 348/* hfi1_vnic_update_rx_counters - update receive counters */
 349static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
 350                                         u8 q_idx, struct sk_buff *skb, int err)
 351{
 352        struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
 353        struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
 354        struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
 355        u16 vlan_tci;
 356
 357        stats->netstats.rx_packets++;
 358        stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
 359
 360        update_len_counters(rx_grp, skb->len);
 361
 362        /* rest of the counts are for good packets only */
 363        if (unlikely(err))
 364                return;
 365
 366        if (is_multicast_ether_addr(mac_hdr->h_dest))
 367                rx_grp->mcastbcast++;
 368        else
 369                rx_grp->unicast++;
 370
 371        if (!__vlan_get_tag(skb, &vlan_tci))
 372                rx_grp->vlan++;
 373        else
 374                rx_grp->untagged++;
 375}
 376
 377/* This function is overloaded for opa_vnic specific implementation */
 378static void hfi1_vnic_get_stats64(struct net_device *netdev,
 379                                  struct rtnl_link_stats64 *stats)
 380{
 381        struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
 382        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 383
 384        hfi1_vnic_update_stats(vinfo, vstats);
 385}
 386
 387static u64 create_bypass_pbc(u32 vl, u32 dw_len)
 388{
 389        u64 pbc;
 390
 391        pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
 392                | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
 393                | PBC_PACKET_BYPASS
 394                | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
 395                | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
 396
 397        return pbc;
 398}
 399
 400/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
 401static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
 402                                    u8 q_idx)
 403{
 404        netif_stop_subqueue(vinfo->netdev, q_idx);
 405        if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
 406                return;
 407
 408        netif_start_subqueue(vinfo->netdev, q_idx);
 409}
 410
 411static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
 412                                          struct net_device *netdev)
 413{
 414        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 415        u8 pad_len, q_idx = skb->queue_mapping;
 416        struct hfi1_devdata *dd = vinfo->dd;
 417        struct opa_vnic_skb_mdata *mdata;
 418        u32 pkt_len, total_len;
 419        int err = -EINVAL;
 420        u64 pbc;
 421
 422        v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
 423        if (unlikely(!netif_oper_up(netdev))) {
 424                vinfo->stats[q_idx].tx_drop_state++;
 425                goto tx_finish;
 426        }
 427
 428        /* take out meta data */
 429        mdata = (struct opa_vnic_skb_mdata *)skb->data;
 430        skb_pull(skb, sizeof(*mdata));
 431        if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
 432                vinfo->stats[q_idx].tx_dlid_zero++;
 433                goto tx_finish;
 434        }
 435
 436        /* add tail padding (for 8 bytes size alignment) and icrc */
 437        pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
 438        pad_len += OPA_VNIC_ICRC_TAIL_LEN;
 439
 440        /*
 441         * pkt_len is how much data we have to write, includes header and data.
 442         * total_len is length of the packet in Dwords plus the PBC should not
 443         * include the CRC.
 444         */
 445        pkt_len = (skb->len + pad_len) >> 2;
 446        total_len = pkt_len + 2; /* PBC + packet */
 447
 448        pbc = create_bypass_pbc(mdata->vl, total_len);
 449
 450        skb_get(skb);
 451        v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
 452        err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
 453        if (unlikely(err)) {
 454                if (err == -ENOMEM)
 455                        vinfo->stats[q_idx].netstats.tx_fifo_errors++;
 456                else if (err != -EBUSY)
 457                        vinfo->stats[q_idx].netstats.tx_carrier_errors++;
 458        }
 459        /* remove the header before updating tx counters */
 460        skb_pull(skb, OPA_VNIC_HDR_LEN);
 461
 462        if (unlikely(err == -EBUSY)) {
 463                hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
 464                dev_kfree_skb_any(skb);
 465                return NETDEV_TX_BUSY;
 466        }
 467
 468tx_finish:
 469        /* update tx counters */
 470        hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
 471        dev_kfree_skb_any(skb);
 472        return NETDEV_TX_OK;
 473}
 474
 475static u16 hfi1_vnic_select_queue(struct net_device *netdev,
 476                                  struct sk_buff *skb,
 477                                  void *accel_priv,
 478                                  select_queue_fallback_t fallback)
 479{
 480        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 481        struct opa_vnic_skb_mdata *mdata;
 482        struct sdma_engine *sde;
 483
 484        mdata = (struct opa_vnic_skb_mdata *)skb->data;
 485        sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
 486        return sde->this_idx;
 487}
 488
 489/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
 490static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
 491                                      struct sk_buff *skb)
 492{
 493        struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
 494        int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
 495        int rc = -EFAULT;
 496
 497        skb_pull(skb, OPA_VNIC_HDR_LEN);
 498
 499        /* Validate Packet length */
 500        if (unlikely(skb->len > max_len))
 501                vinfo->stats[rxq->idx].rx_oversize++;
 502        else if (unlikely(skb->len < ETH_ZLEN))
 503                vinfo->stats[rxq->idx].rx_runt++;
 504        else
 505                rc = 0;
 506        return rc;
 507}
 508
 509static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
 510{
 511        unsigned char *pad_info;
 512        struct sk_buff *skb;
 513
 514        skb = skb_dequeue(&rxq->skbq);
 515        if (unlikely(!skb))
 516                return NULL;
 517
 518        /* remove tail padding and icrc */
 519        pad_info = skb->data + skb->len - 1;
 520        skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
 521                       ((*pad_info) & 0x7)));
 522
 523        return skb;
 524}
 525
 526/* hfi1_vnic_handle_rx - handle skb receive */
 527static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
 528                                int *work_done, int work_to_do)
 529{
 530        struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
 531        struct sk_buff *skb;
 532        int rc;
 533
 534        while (1) {
 535                if (*work_done >= work_to_do)
 536                        break;
 537
 538                skb = hfi1_vnic_get_skb(rxq);
 539                if (unlikely(!skb))
 540                        break;
 541
 542                rc = hfi1_vnic_decap_skb(rxq, skb);
 543                /* update rx counters */
 544                hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
 545                if (unlikely(rc)) {
 546                        dev_kfree_skb_any(skb);
 547                        continue;
 548                }
 549
 550                skb_checksum_none_assert(skb);
 551                skb->protocol = eth_type_trans(skb, rxq->netdev);
 552
 553                napi_gro_receive(&rxq->napi, skb);
 554                (*work_done)++;
 555        }
 556}
 557
 558/* hfi1_vnic_napi - napi receive polling callback function */
 559static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
 560{
 561        struct hfi1_vnic_rx_queue *rxq = container_of(napi,
 562                                              struct hfi1_vnic_rx_queue, napi);
 563        struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
 564        int work_done = 0;
 565
 566        v_dbg("napi %d budget %d\n", rxq->idx, budget);
 567        hfi1_vnic_handle_rx(rxq, &work_done, budget);
 568
 569        v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
 570        if (work_done < budget)
 571                napi_complete(napi);
 572
 573        return work_done;
 574}
 575
 576void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
 577{
 578        struct hfi1_devdata *dd = packet->rcd->dd;
 579        struct hfi1_vnic_vport_info *vinfo = NULL;
 580        struct hfi1_vnic_rx_queue *rxq;
 581        struct sk_buff *skb;
 582        int l4_type, vesw_id = -1;
 583        u8 q_idx;
 584
 585        l4_type = HFI1_GET_L4_TYPE(packet->ebuf);
 586        if (likely(l4_type == OPA_VNIC_L4_ETHR)) {
 587                vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
 588                vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
 589
 590                /*
 591                 * In case of invalid vesw id, count the error on
 592                 * the first available vport.
 593                 */
 594                if (unlikely(!vinfo)) {
 595                        struct hfi1_vnic_vport_info *vinfo_tmp;
 596                        int id_tmp = 0;
 597
 598                        vinfo_tmp =  idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
 599                        if (vinfo_tmp) {
 600                                spin_lock(&vport_cntr_lock);
 601                                vinfo_tmp->stats[0].netstats.rx_nohandler++;
 602                                spin_unlock(&vport_cntr_lock);
 603                        }
 604                }
 605        }
 606
 607        if (unlikely(!vinfo)) {
 608                dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
 609                            l4_type, vesw_id, packet->rcd->ctxt);
 610                return;
 611        }
 612
 613        q_idx = packet->rcd->vnic_q_idx;
 614        rxq = &vinfo->rxq[q_idx];
 615        if (unlikely(!netif_oper_up(vinfo->netdev))) {
 616                vinfo->stats[q_idx].rx_drop_state++;
 617                skb_queue_purge(&rxq->skbq);
 618                return;
 619        }
 620
 621        if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
 622                vinfo->stats[q_idx].netstats.rx_fifo_errors++;
 623                return;
 624        }
 625
 626        skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
 627        if (unlikely(!skb)) {
 628                vinfo->stats[q_idx].netstats.rx_fifo_errors++;
 629                return;
 630        }
 631
 632        memcpy(skb->data, packet->ebuf, packet->tlen);
 633        skb_put(skb, packet->tlen);
 634        skb_queue_tail(&rxq->skbq, skb);
 635
 636        if (napi_schedule_prep(&rxq->napi)) {
 637                v_dbg("napi %d scheduling\n", q_idx);
 638                __napi_schedule(&rxq->napi);
 639        }
 640}
 641
 642static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
 643{
 644        struct hfi1_devdata *dd = vinfo->dd;
 645        struct net_device *netdev = vinfo->netdev;
 646        int i, rc;
 647
 648        /* ensure virtual eth switch id is valid */
 649        if (!vinfo->vesw_id)
 650                return -EINVAL;
 651
 652        rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
 653                       vinfo->vesw_id + 1, GFP_NOWAIT);
 654        if (rc < 0)
 655                return rc;
 656
 657        for (i = 0; i < vinfo->num_rx_q; i++) {
 658                struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
 659
 660                skb_queue_head_init(&rxq->skbq);
 661                napi_enable(&rxq->napi);
 662        }
 663
 664        netif_carrier_on(netdev);
 665        netif_tx_start_all_queues(netdev);
 666        set_bit(HFI1_VNIC_UP, &vinfo->flags);
 667
 668        return 0;
 669}
 670
 671static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
 672{
 673        struct hfi1_devdata *dd = vinfo->dd;
 674        u8 i;
 675
 676        clear_bit(HFI1_VNIC_UP, &vinfo->flags);
 677        netif_carrier_off(vinfo->netdev);
 678        netif_tx_disable(vinfo->netdev);
 679        idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
 680
 681        /* ensure irqs see the change */
 682        hfi1_vnic_synchronize_irq(dd);
 683
 684        /* remove unread skbs */
 685        for (i = 0; i < vinfo->num_rx_q; i++) {
 686                struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
 687
 688                napi_disable(&rxq->napi);
 689                skb_queue_purge(&rxq->skbq);
 690        }
 691}
 692
 693static int hfi1_netdev_open(struct net_device *netdev)
 694{
 695        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 696        int rc;
 697
 698        mutex_lock(&vinfo->lock);
 699        rc = hfi1_vnic_up(vinfo);
 700        mutex_unlock(&vinfo->lock);
 701        return rc;
 702}
 703
 704static int hfi1_netdev_close(struct net_device *netdev)
 705{
 706        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 707
 708        mutex_lock(&vinfo->lock);
 709        if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
 710                hfi1_vnic_down(vinfo);
 711        mutex_unlock(&vinfo->lock);
 712        return 0;
 713}
 714
 715static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
 716                                struct hfi1_ctxtdata **vnic_ctxt)
 717{
 718        int rc;
 719
 720        rc = allocate_vnic_ctxt(dd, vnic_ctxt);
 721        if (rc) {
 722                dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
 723                return rc;
 724        }
 725
 726        rc = setup_vnic_ctxt(dd, *vnic_ctxt);
 727        if (rc) {
 728                dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
 729                deallocate_vnic_ctxt(dd, *vnic_ctxt);
 730                *vnic_ctxt = NULL;
 731        }
 732
 733        return rc;
 734}
 735
 736static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
 737{
 738        struct hfi1_devdata *dd = vinfo->dd;
 739        int i, rc = 0;
 740
 741        mutex_lock(&hfi1_mutex);
 742        if (!dd->vnic.num_vports) {
 743                rc = hfi1_vnic_txreq_init(dd);
 744                if (rc)
 745                        goto txreq_fail;
 746
 747                dd->vnic.msix_idx = dd->first_dyn_msix_idx;
 748        }
 749
 750        for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
 751                rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
 752                if (rc)
 753                        break;
 754                dd->vnic.ctxt[i]->vnic_q_idx = i;
 755        }
 756
 757        if (i < vinfo->num_rx_q) {
 758                /*
 759                 * If required amount of contexts is not
 760                 * allocated successfully then remaining contexts
 761                 * are released.
 762                 */
 763                while (i-- > dd->vnic.num_ctxt) {
 764                        deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
 765                        dd->vnic.ctxt[i] = NULL;
 766                }
 767                goto alloc_fail;
 768        }
 769
 770        if (dd->vnic.num_ctxt != i) {
 771                dd->vnic.num_ctxt = i;
 772                hfi1_init_vnic_rsm(dd);
 773        }
 774
 775        dd->vnic.num_vports++;
 776        hfi1_vnic_sdma_init(vinfo);
 777alloc_fail:
 778        if (!dd->vnic.num_vports)
 779                hfi1_vnic_txreq_deinit(dd);
 780txreq_fail:
 781        mutex_unlock(&hfi1_mutex);
 782        return rc;
 783}
 784
 785static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
 786{
 787        struct hfi1_devdata *dd = vinfo->dd;
 788        int i;
 789
 790        mutex_lock(&hfi1_mutex);
 791        if (--dd->vnic.num_vports == 0) {
 792                for (i = 0; i < dd->vnic.num_ctxt; i++) {
 793                        deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
 794                        dd->vnic.ctxt[i] = NULL;
 795                }
 796                hfi1_deinit_vnic_rsm(dd);
 797                dd->vnic.num_ctxt = 0;
 798                hfi1_vnic_txreq_deinit(dd);
 799        }
 800        mutex_unlock(&hfi1_mutex);
 801}
 802
 803static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
 804{
 805        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 806        bool reopen = false;
 807
 808        /*
 809         * If vesw_id is being changed, and if the vnic port is up,
 810         * reset the vnic port to ensure new vesw_id gets picked up
 811         */
 812        if (id != vinfo->vesw_id) {
 813                mutex_lock(&vinfo->lock);
 814                if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
 815                        hfi1_vnic_down(vinfo);
 816                        reopen = true;
 817                }
 818
 819                vinfo->vesw_id = id;
 820                if (reopen)
 821                        hfi1_vnic_up(vinfo);
 822
 823                mutex_unlock(&vinfo->lock);
 824        }
 825}
 826
 827/* netdev ops */
 828static const struct net_device_ops hfi1_netdev_ops = {
 829        .ndo_open = hfi1_netdev_open,
 830        .ndo_stop = hfi1_netdev_close,
 831        .ndo_start_xmit = hfi1_netdev_start_xmit,
 832        .ndo_select_queue = hfi1_vnic_select_queue,
 833        .ndo_get_stats64 = hfi1_vnic_get_stats64,
 834};
 835
 836struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
 837                                      u8 port_num,
 838                                      enum rdma_netdev_t type,
 839                                      const char *name,
 840                                      unsigned char name_assign_type,
 841                                      void (*setup)(struct net_device *))
 842{
 843        struct hfi1_devdata *dd = dd_from_ibdev(device);
 844        struct hfi1_vnic_vport_info *vinfo;
 845        struct net_device *netdev;
 846        struct rdma_netdev *rn;
 847        int i, size, rc;
 848
 849        if (!port_num || (port_num > dd->num_pports))
 850                return ERR_PTR(-EINVAL);
 851
 852        if (type != RDMA_NETDEV_OPA_VNIC)
 853                return ERR_PTR(-EOPNOTSUPP);
 854
 855        size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
 856        netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
 857                                  dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT);
 858        if (!netdev)
 859                return ERR_PTR(-ENOMEM);
 860
 861        rn = netdev_priv(netdev);
 862        vinfo = opa_vnic_dev_priv(netdev);
 863        vinfo->dd = dd;
 864        vinfo->num_tx_q = dd->chip_sdma_engines;
 865        vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT;
 866        vinfo->netdev = netdev;
 867        rn->set_id = hfi1_vnic_set_vesw_id;
 868
 869        netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
 870        netdev->hw_features = netdev->features;
 871        netdev->vlan_features = netdev->features;
 872        netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
 873        netdev->netdev_ops = &hfi1_netdev_ops;
 874        mutex_init(&vinfo->lock);
 875
 876        for (i = 0; i < vinfo->num_rx_q; i++) {
 877                struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
 878
 879                rxq->idx = i;
 880                rxq->vinfo = vinfo;
 881                rxq->netdev = netdev;
 882                netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
 883        }
 884
 885        rc = hfi1_vnic_init(vinfo);
 886        if (rc)
 887                goto init_fail;
 888
 889        return netdev;
 890init_fail:
 891        mutex_destroy(&vinfo->lock);
 892        free_netdev(netdev);
 893        return ERR_PTR(rc);
 894}
 895
 896void hfi1_vnic_free_rn(struct net_device *netdev)
 897{
 898        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 899
 900        hfi1_vnic_deinit(vinfo);
 901        mutex_destroy(&vinfo->lock);
 902        free_netdev(netdev);
 903}
 904