linux/drivers/infiniband/hw/hfi1/vnic_main.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2017 - 2018 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48/*
  49 * This file contains HFI1 support for VNIC functionality
  50 */
  51
  52#include <linux/io.h>
  53#include <linux/if_vlan.h>
  54
  55#include "vnic.h"
  56
  57#define HFI_TX_TIMEOUT_MS 1000
  58
  59#define HFI1_VNIC_RCV_Q_SIZE   1024
  60
  61#define HFI1_VNIC_UP 0
  62
  63static DEFINE_SPINLOCK(vport_cntr_lock);
  64
  65static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
  66{
  67        unsigned int rcvctrl_ops = 0;
  68        int ret;
  69
  70        uctxt->do_interrupt = &handle_receive_interrupt;
  71
  72        /* Now allocate the RcvHdr queue and eager buffers. */
  73        ret = hfi1_create_rcvhdrq(dd, uctxt);
  74        if (ret)
  75                goto done;
  76
  77        ret = hfi1_setup_eagerbufs(uctxt);
  78        if (ret)
  79                goto done;
  80
  81        if (uctxt->rcvhdrtail_kvaddr)
  82                clear_rcvhdrtail(uctxt);
  83
  84        rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
  85        rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
  86
  87        if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
  88                rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
  89        if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
  90                rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
  91        if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
  92                rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
  93        if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
  94                rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
  95
  96        hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
  97done:
  98        return ret;
  99}
 100
 101static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
 102                              struct hfi1_ctxtdata **vnic_ctxt)
 103{
 104        struct hfi1_ctxtdata *uctxt;
 105        int ret;
 106
 107        if (dd->flags & HFI1_FROZEN)
 108                return -EIO;
 109
 110        ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
 111        if (ret < 0) {
 112                dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
 113                return -ENOMEM;
 114        }
 115
 116        uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
 117                        HFI1_CAP_KGET(NODROP_RHQ_FULL) |
 118                        HFI1_CAP_KGET(NODROP_EGR_FULL) |
 119                        HFI1_CAP_KGET(DMA_RTAIL);
 120        uctxt->seq_cnt = 1;
 121        uctxt->is_vnic = true;
 122
 123        msix_request_rcd_irq(uctxt);
 124
 125        hfi1_stats.sps_ctxts++;
 126        dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
 127        *vnic_ctxt = uctxt;
 128
 129        return 0;
 130}
 131
 132static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
 133                                 struct hfi1_ctxtdata *uctxt)
 134{
 135        dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
 136        flush_wc();
 137
 138        /*
 139         * Disable receive context and interrupt available, reset all
 140         * RcvCtxtCtrl bits to default values.
 141         */
 142        hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
 143                     HFI1_RCVCTRL_TIDFLOW_DIS |
 144                     HFI1_RCVCTRL_INTRAVAIL_DIS |
 145                     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
 146                     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
 147                     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
 148
 149        /* msix_intr will always be > 0, only clean up if this is true */
 150        if (uctxt->msix_intr)
 151                msix_free_irq(dd, uctxt->msix_intr);
 152
 153        uctxt->event_flags = 0;
 154
 155        hfi1_clear_tids(uctxt);
 156        hfi1_clear_ctxt_pkey(dd, uctxt);
 157
 158        hfi1_stats.sps_ctxts--;
 159
 160        hfi1_free_ctxt(uctxt);
 161}
 162
 163void hfi1_vnic_setup(struct hfi1_devdata *dd)
 164{
 165        idr_init(&dd->vnic.vesw_idr);
 166}
 167
 168void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
 169{
 170        idr_destroy(&dd->vnic.vesw_idr);
 171}
 172
 173#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do {            \
 174                u64 *src64, *dst64;                            \
 175                for (src64 = &qstats->x_grp.unicast,           \
 176                        dst64 = &stats->x_grp.unicast;         \
 177                        dst64 <= &stats->x_grp.s_1519_max;) {  \
 178                        *dst64++ += *src64++;                  \
 179                }                                              \
 180        } while (0)
 181
 182/* hfi1_vnic_update_stats - update statistics */
 183static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
 184                                   struct opa_vnic_stats *stats)
 185{
 186        struct net_device *netdev = vinfo->netdev;
 187        u8 i;
 188
 189        /* add tx counters on different queues */
 190        for (i = 0; i < vinfo->num_tx_q; i++) {
 191                struct opa_vnic_stats *qstats = &vinfo->stats[i];
 192                struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
 193
 194                stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
 195                stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
 196                stats->tx_drop_state += qstats->tx_drop_state;
 197                stats->tx_dlid_zero += qstats->tx_dlid_zero;
 198
 199                SUM_GRP_COUNTERS(stats, qstats, tx_grp);
 200                stats->netstats.tx_packets += qnstats->tx_packets;
 201                stats->netstats.tx_bytes += qnstats->tx_bytes;
 202        }
 203
 204        /* add rx counters on different queues */
 205        for (i = 0; i < vinfo->num_rx_q; i++) {
 206                struct opa_vnic_stats *qstats = &vinfo->stats[i];
 207                struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
 208
 209                stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
 210                stats->netstats.rx_nohandler += qnstats->rx_nohandler;
 211                stats->rx_drop_state += qstats->rx_drop_state;
 212                stats->rx_oversize += qstats->rx_oversize;
 213                stats->rx_runt += qstats->rx_runt;
 214
 215                SUM_GRP_COUNTERS(stats, qstats, rx_grp);
 216                stats->netstats.rx_packets += qnstats->rx_packets;
 217                stats->netstats.rx_bytes += qnstats->rx_bytes;
 218        }
 219
 220        stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
 221                                    stats->netstats.tx_carrier_errors +
 222                                    stats->tx_drop_state + stats->tx_dlid_zero;
 223        stats->netstats.tx_dropped = stats->netstats.tx_errors;
 224
 225        stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
 226                                    stats->netstats.rx_nohandler +
 227                                    stats->rx_drop_state + stats->rx_oversize +
 228                                    stats->rx_runt;
 229        stats->netstats.rx_dropped = stats->netstats.rx_errors;
 230
 231        netdev->stats.tx_packets = stats->netstats.tx_packets;
 232        netdev->stats.tx_bytes = stats->netstats.tx_bytes;
 233        netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
 234        netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
 235        netdev->stats.tx_errors = stats->netstats.tx_errors;
 236        netdev->stats.tx_dropped = stats->netstats.tx_dropped;
 237
 238        netdev->stats.rx_packets = stats->netstats.rx_packets;
 239        netdev->stats.rx_bytes = stats->netstats.rx_bytes;
 240        netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
 241        netdev->stats.multicast = stats->rx_grp.mcastbcast;
 242        netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
 243        netdev->stats.rx_errors = stats->netstats.rx_errors;
 244        netdev->stats.rx_dropped = stats->netstats.rx_dropped;
 245}
 246
 247/* update_len_counters - update pkt's len histogram counters */
 248static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
 249                                       int len)
 250{
 251        /* account for 4 byte FCS */
 252        if (len >= 1515)
 253                grp->s_1519_max++;
 254        else if (len >= 1020)
 255                grp->s_1024_1518++;
 256        else if (len >= 508)
 257                grp->s_512_1023++;
 258        else if (len >= 252)
 259                grp->s_256_511++;
 260        else if (len >= 124)
 261                grp->s_128_255++;
 262        else if (len >= 61)
 263                grp->s_65_127++;
 264        else
 265                grp->s_64++;
 266}
 267
 268/* hfi1_vnic_update_tx_counters - update transmit counters */
 269static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
 270                                         u8 q_idx, struct sk_buff *skb, int err)
 271{
 272        struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
 273        struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
 274        struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
 275        u16 vlan_tci;
 276
 277        stats->netstats.tx_packets++;
 278        stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
 279
 280        update_len_counters(tx_grp, skb->len);
 281
 282        /* rest of the counts are for good packets only */
 283        if (unlikely(err))
 284                return;
 285
 286        if (is_multicast_ether_addr(mac_hdr->h_dest))
 287                tx_grp->mcastbcast++;
 288        else
 289                tx_grp->unicast++;
 290
 291        if (!__vlan_get_tag(skb, &vlan_tci))
 292                tx_grp->vlan++;
 293        else
 294                tx_grp->untagged++;
 295}
 296
 297/* hfi1_vnic_update_rx_counters - update receive counters */
 298static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
 299                                         u8 q_idx, struct sk_buff *skb, int err)
 300{
 301        struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
 302        struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
 303        struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
 304        u16 vlan_tci;
 305
 306        stats->netstats.rx_packets++;
 307        stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
 308
 309        update_len_counters(rx_grp, skb->len);
 310
 311        /* rest of the counts are for good packets only */
 312        if (unlikely(err))
 313                return;
 314
 315        if (is_multicast_ether_addr(mac_hdr->h_dest))
 316                rx_grp->mcastbcast++;
 317        else
 318                rx_grp->unicast++;
 319
 320        if (!__vlan_get_tag(skb, &vlan_tci))
 321                rx_grp->vlan++;
 322        else
 323                rx_grp->untagged++;
 324}
 325
 326/* This function is overloaded for opa_vnic specific implementation */
 327static void hfi1_vnic_get_stats64(struct net_device *netdev,
 328                                  struct rtnl_link_stats64 *stats)
 329{
 330        struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
 331        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 332
 333        hfi1_vnic_update_stats(vinfo, vstats);
 334}
 335
 336static u64 create_bypass_pbc(u32 vl, u32 dw_len)
 337{
 338        u64 pbc;
 339
 340        pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
 341                | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
 342                | PBC_PACKET_BYPASS
 343                | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
 344                | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
 345
 346        return pbc;
 347}
 348
 349/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
 350static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
 351                                    u8 q_idx)
 352{
 353        netif_stop_subqueue(vinfo->netdev, q_idx);
 354        if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
 355                return;
 356
 357        netif_start_subqueue(vinfo->netdev, q_idx);
 358}
 359
 360static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
 361                                          struct net_device *netdev)
 362{
 363        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 364        u8 pad_len, q_idx = skb->queue_mapping;
 365        struct hfi1_devdata *dd = vinfo->dd;
 366        struct opa_vnic_skb_mdata *mdata;
 367        u32 pkt_len, total_len;
 368        int err = -EINVAL;
 369        u64 pbc;
 370
 371        v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
 372        if (unlikely(!netif_oper_up(netdev))) {
 373                vinfo->stats[q_idx].tx_drop_state++;
 374                goto tx_finish;
 375        }
 376
 377        /* take out meta data */
 378        mdata = (struct opa_vnic_skb_mdata *)skb->data;
 379        skb_pull(skb, sizeof(*mdata));
 380        if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
 381                vinfo->stats[q_idx].tx_dlid_zero++;
 382                goto tx_finish;
 383        }
 384
 385        /* add tail padding (for 8 bytes size alignment) and icrc */
 386        pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
 387        pad_len += OPA_VNIC_ICRC_TAIL_LEN;
 388
 389        /*
 390         * pkt_len is how much data we have to write, includes header and data.
 391         * total_len is length of the packet in Dwords plus the PBC should not
 392         * include the CRC.
 393         */
 394        pkt_len = (skb->len + pad_len) >> 2;
 395        total_len = pkt_len + 2; /* PBC + packet */
 396
 397        pbc = create_bypass_pbc(mdata->vl, total_len);
 398
 399        skb_get(skb);
 400        v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
 401        err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
 402        if (unlikely(err)) {
 403                if (err == -ENOMEM)
 404                        vinfo->stats[q_idx].netstats.tx_fifo_errors++;
 405                else if (err != -EBUSY)
 406                        vinfo->stats[q_idx].netstats.tx_carrier_errors++;
 407        }
 408        /* remove the header before updating tx counters */
 409        skb_pull(skb, OPA_VNIC_HDR_LEN);
 410
 411        if (unlikely(err == -EBUSY)) {
 412                hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
 413                dev_kfree_skb_any(skb);
 414                return NETDEV_TX_BUSY;
 415        }
 416
 417tx_finish:
 418        /* update tx counters */
 419        hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
 420        dev_kfree_skb_any(skb);
 421        return NETDEV_TX_OK;
 422}
 423
 424static u16 hfi1_vnic_select_queue(struct net_device *netdev,
 425                                  struct sk_buff *skb,
 426                                  void *accel_priv,
 427                                  select_queue_fallback_t fallback)
 428{
 429        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 430        struct opa_vnic_skb_mdata *mdata;
 431        struct sdma_engine *sde;
 432
 433        mdata = (struct opa_vnic_skb_mdata *)skb->data;
 434        sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
 435        return sde->this_idx;
 436}
 437
 438/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
 439static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
 440                                      struct sk_buff *skb)
 441{
 442        struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
 443        int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
 444        int rc = -EFAULT;
 445
 446        skb_pull(skb, OPA_VNIC_HDR_LEN);
 447
 448        /* Validate Packet length */
 449        if (unlikely(skb->len > max_len))
 450                vinfo->stats[rxq->idx].rx_oversize++;
 451        else if (unlikely(skb->len < ETH_ZLEN))
 452                vinfo->stats[rxq->idx].rx_runt++;
 453        else
 454                rc = 0;
 455        return rc;
 456}
 457
 458static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
 459{
 460        unsigned char *pad_info;
 461        struct sk_buff *skb;
 462
 463        skb = skb_dequeue(&rxq->skbq);
 464        if (unlikely(!skb))
 465                return NULL;
 466
 467        /* remove tail padding and icrc */
 468        pad_info = skb->data + skb->len - 1;
 469        skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
 470                       ((*pad_info) & 0x7)));
 471
 472        return skb;
 473}
 474
 475/* hfi1_vnic_handle_rx - handle skb receive */
 476static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
 477                                int *work_done, int work_to_do)
 478{
 479        struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
 480        struct sk_buff *skb;
 481        int rc;
 482
 483        while (1) {
 484                if (*work_done >= work_to_do)
 485                        break;
 486
 487                skb = hfi1_vnic_get_skb(rxq);
 488                if (unlikely(!skb))
 489                        break;
 490
 491                rc = hfi1_vnic_decap_skb(rxq, skb);
 492                /* update rx counters */
 493                hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
 494                if (unlikely(rc)) {
 495                        dev_kfree_skb_any(skb);
 496                        continue;
 497                }
 498
 499                skb_checksum_none_assert(skb);
 500                skb->protocol = eth_type_trans(skb, rxq->netdev);
 501
 502                napi_gro_receive(&rxq->napi, skb);
 503                (*work_done)++;
 504        }
 505}
 506
 507/* hfi1_vnic_napi - napi receive polling callback function */
 508static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
 509{
 510        struct hfi1_vnic_rx_queue *rxq = container_of(napi,
 511                                              struct hfi1_vnic_rx_queue, napi);
 512        struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
 513        int work_done = 0;
 514
 515        v_dbg("napi %d budget %d\n", rxq->idx, budget);
 516        hfi1_vnic_handle_rx(rxq, &work_done, budget);
 517
 518        v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
 519        if (work_done < budget)
 520                napi_complete(napi);
 521
 522        return work_done;
 523}
 524
 525void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
 526{
 527        struct hfi1_devdata *dd = packet->rcd->dd;
 528        struct hfi1_vnic_vport_info *vinfo = NULL;
 529        struct hfi1_vnic_rx_queue *rxq;
 530        struct sk_buff *skb;
 531        int l4_type, vesw_id = -1;
 532        u8 q_idx;
 533
 534        l4_type = hfi1_16B_get_l4(packet->ebuf);
 535        if (likely(l4_type == OPA_16B_L4_ETHR)) {
 536                vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
 537                vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
 538
 539                /*
 540                 * In case of invalid vesw id, count the error on
 541                 * the first available vport.
 542                 */
 543                if (unlikely(!vinfo)) {
 544                        struct hfi1_vnic_vport_info *vinfo_tmp;
 545                        int id_tmp = 0;
 546
 547                        vinfo_tmp =  idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
 548                        if (vinfo_tmp) {
 549                                spin_lock(&vport_cntr_lock);
 550                                vinfo_tmp->stats[0].netstats.rx_nohandler++;
 551                                spin_unlock(&vport_cntr_lock);
 552                        }
 553                }
 554        }
 555
 556        if (unlikely(!vinfo)) {
 557                dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
 558                            l4_type, vesw_id, packet->rcd->ctxt);
 559                return;
 560        }
 561
 562        q_idx = packet->rcd->vnic_q_idx;
 563        rxq = &vinfo->rxq[q_idx];
 564        if (unlikely(!netif_oper_up(vinfo->netdev))) {
 565                vinfo->stats[q_idx].rx_drop_state++;
 566                skb_queue_purge(&rxq->skbq);
 567                return;
 568        }
 569
 570        if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
 571                vinfo->stats[q_idx].netstats.rx_fifo_errors++;
 572                return;
 573        }
 574
 575        skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
 576        if (unlikely(!skb)) {
 577                vinfo->stats[q_idx].netstats.rx_fifo_errors++;
 578                return;
 579        }
 580
 581        memcpy(skb->data, packet->ebuf, packet->tlen);
 582        skb_put(skb, packet->tlen);
 583        skb_queue_tail(&rxq->skbq, skb);
 584
 585        if (napi_schedule_prep(&rxq->napi)) {
 586                v_dbg("napi %d scheduling\n", q_idx);
 587                __napi_schedule(&rxq->napi);
 588        }
 589}
 590
 591static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
 592{
 593        struct hfi1_devdata *dd = vinfo->dd;
 594        struct net_device *netdev = vinfo->netdev;
 595        int i, rc;
 596
 597        /* ensure virtual eth switch id is valid */
 598        if (!vinfo->vesw_id)
 599                return -EINVAL;
 600
 601        rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
 602                       vinfo->vesw_id + 1, GFP_NOWAIT);
 603        if (rc < 0)
 604                return rc;
 605
 606        for (i = 0; i < vinfo->num_rx_q; i++) {
 607                struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
 608
 609                skb_queue_head_init(&rxq->skbq);
 610                napi_enable(&rxq->napi);
 611        }
 612
 613        netif_carrier_on(netdev);
 614        netif_tx_start_all_queues(netdev);
 615        set_bit(HFI1_VNIC_UP, &vinfo->flags);
 616
 617        return 0;
 618}
 619
 620static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
 621{
 622        struct hfi1_devdata *dd = vinfo->dd;
 623        u8 i;
 624
 625        clear_bit(HFI1_VNIC_UP, &vinfo->flags);
 626        netif_carrier_off(vinfo->netdev);
 627        netif_tx_disable(vinfo->netdev);
 628        idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
 629
 630        /* ensure irqs see the change */
 631        msix_vnic_synchronize_irq(dd);
 632
 633        /* remove unread skbs */
 634        for (i = 0; i < vinfo->num_rx_q; i++) {
 635                struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
 636
 637                napi_disable(&rxq->napi);
 638                skb_queue_purge(&rxq->skbq);
 639        }
 640}
 641
 642static int hfi1_netdev_open(struct net_device *netdev)
 643{
 644        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 645        int rc;
 646
 647        mutex_lock(&vinfo->lock);
 648        rc = hfi1_vnic_up(vinfo);
 649        mutex_unlock(&vinfo->lock);
 650        return rc;
 651}
 652
 653static int hfi1_netdev_close(struct net_device *netdev)
 654{
 655        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 656
 657        mutex_lock(&vinfo->lock);
 658        if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
 659                hfi1_vnic_down(vinfo);
 660        mutex_unlock(&vinfo->lock);
 661        return 0;
 662}
 663
 664static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
 665                                struct hfi1_ctxtdata **vnic_ctxt)
 666{
 667        int rc;
 668
 669        rc = allocate_vnic_ctxt(dd, vnic_ctxt);
 670        if (rc) {
 671                dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
 672                return rc;
 673        }
 674
 675        rc = setup_vnic_ctxt(dd, *vnic_ctxt);
 676        if (rc) {
 677                dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
 678                deallocate_vnic_ctxt(dd, *vnic_ctxt);
 679                *vnic_ctxt = NULL;
 680        }
 681
 682        return rc;
 683}
 684
 685static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
 686{
 687        struct hfi1_devdata *dd = vinfo->dd;
 688        int i, rc = 0;
 689
 690        mutex_lock(&hfi1_mutex);
 691        if (!dd->vnic.num_vports) {
 692                rc = hfi1_vnic_txreq_init(dd);
 693                if (rc)
 694                        goto txreq_fail;
 695        }
 696
 697        for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
 698                rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
 699                if (rc)
 700                        break;
 701                hfi1_rcd_get(dd->vnic.ctxt[i]);
 702                dd->vnic.ctxt[i]->vnic_q_idx = i;
 703        }
 704
 705        if (i < vinfo->num_rx_q) {
 706                /*
 707                 * If required amount of contexts is not
 708                 * allocated successfully then remaining contexts
 709                 * are released.
 710                 */
 711                while (i-- > dd->vnic.num_ctxt) {
 712                        deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
 713                        hfi1_rcd_put(dd->vnic.ctxt[i]);
 714                        dd->vnic.ctxt[i] = NULL;
 715                }
 716                goto alloc_fail;
 717        }
 718
 719        if (dd->vnic.num_ctxt != i) {
 720                dd->vnic.num_ctxt = i;
 721                hfi1_init_vnic_rsm(dd);
 722        }
 723
 724        dd->vnic.num_vports++;
 725        hfi1_vnic_sdma_init(vinfo);
 726alloc_fail:
 727        if (!dd->vnic.num_vports)
 728                hfi1_vnic_txreq_deinit(dd);
 729txreq_fail:
 730        mutex_unlock(&hfi1_mutex);
 731        return rc;
 732}
 733
 734static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
 735{
 736        struct hfi1_devdata *dd = vinfo->dd;
 737        int i;
 738
 739        mutex_lock(&hfi1_mutex);
 740        if (--dd->vnic.num_vports == 0) {
 741                for (i = 0; i < dd->vnic.num_ctxt; i++) {
 742                        deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
 743                        hfi1_rcd_put(dd->vnic.ctxt[i]);
 744                        dd->vnic.ctxt[i] = NULL;
 745                }
 746                hfi1_deinit_vnic_rsm(dd);
 747                dd->vnic.num_ctxt = 0;
 748                hfi1_vnic_txreq_deinit(dd);
 749        }
 750        mutex_unlock(&hfi1_mutex);
 751}
 752
 753static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
 754{
 755        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 756        bool reopen = false;
 757
 758        /*
 759         * If vesw_id is being changed, and if the vnic port is up,
 760         * reset the vnic port to ensure new vesw_id gets picked up
 761         */
 762        if (id != vinfo->vesw_id) {
 763                mutex_lock(&vinfo->lock);
 764                if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
 765                        hfi1_vnic_down(vinfo);
 766                        reopen = true;
 767                }
 768
 769                vinfo->vesw_id = id;
 770                if (reopen)
 771                        hfi1_vnic_up(vinfo);
 772
 773                mutex_unlock(&vinfo->lock);
 774        }
 775}
 776
 777/* netdev ops */
 778static const struct net_device_ops hfi1_netdev_ops = {
 779        .ndo_open = hfi1_netdev_open,
 780        .ndo_stop = hfi1_netdev_close,
 781        .ndo_start_xmit = hfi1_netdev_start_xmit,
 782        .ndo_select_queue = hfi1_vnic_select_queue,
 783        .ndo_get_stats64 = hfi1_vnic_get_stats64,
 784};
 785
 786static void hfi1_vnic_free_rn(struct net_device *netdev)
 787{
 788        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 789
 790        hfi1_vnic_deinit(vinfo);
 791        mutex_destroy(&vinfo->lock);
 792        free_netdev(netdev);
 793}
 794
 795struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
 796                                      u8 port_num,
 797                                      enum rdma_netdev_t type,
 798                                      const char *name,
 799                                      unsigned char name_assign_type,
 800                                      void (*setup)(struct net_device *))
 801{
 802        struct hfi1_devdata *dd = dd_from_ibdev(device);
 803        struct hfi1_vnic_vport_info *vinfo;
 804        struct net_device *netdev;
 805        struct rdma_netdev *rn;
 806        int i, size, rc;
 807
 808        if (!dd->num_vnic_contexts)
 809                return ERR_PTR(-ENOMEM);
 810
 811        if (!port_num || (port_num > dd->num_pports))
 812                return ERR_PTR(-EINVAL);
 813
 814        if (type != RDMA_NETDEV_OPA_VNIC)
 815                return ERR_PTR(-EOPNOTSUPP);
 816
 817        size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
 818        netdev = alloc_netdev_mqs(size, name, setup,
 819                                  dd->num_sdma, dd->num_vnic_contexts);
 820        if (!netdev)
 821                return ERR_PTR(-ENOMEM);
 822
 823        rn = netdev_priv(netdev);
 824        vinfo = opa_vnic_dev_priv(netdev);
 825        vinfo->dd = dd;
 826        vinfo->num_tx_q = dd->num_sdma;
 827        vinfo->num_rx_q = dd->num_vnic_contexts;
 828        vinfo->netdev = netdev;
 829        rn->free_rdma_netdev = hfi1_vnic_free_rn;
 830        rn->set_id = hfi1_vnic_set_vesw_id;
 831
 832        netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
 833        netdev->hw_features = netdev->features;
 834        netdev->vlan_features = netdev->features;
 835        netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
 836        netdev->netdev_ops = &hfi1_netdev_ops;
 837        mutex_init(&vinfo->lock);
 838
 839        for (i = 0; i < vinfo->num_rx_q; i++) {
 840                struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
 841
 842                rxq->idx = i;
 843                rxq->vinfo = vinfo;
 844                rxq->netdev = netdev;
 845                netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
 846        }
 847
 848        rc = hfi1_vnic_init(vinfo);
 849        if (rc)
 850                goto init_fail;
 851
 852        return netdev;
 853init_fail:
 854        mutex_destroy(&vinfo->lock);
 855        free_netdev(netdev);
 856        return ERR_PTR(rc);
 857}
 858