linux/drivers/infiniband/hw/hfi1/vnic_main.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2017 - 2018 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48/*
  49 * This file contains HFI1 support for VNIC functionality
  50 */
  51
  52#include <linux/io.h>
  53#include <linux/if_vlan.h>
  54
  55#include "vnic.h"
  56
  57#define HFI_TX_TIMEOUT_MS 1000
  58
  59#define HFI1_VNIC_RCV_Q_SIZE   1024
  60
  61#define HFI1_VNIC_UP 0
  62
  63static DEFINE_SPINLOCK(vport_cntr_lock);
  64
  65static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
  66{
  67        unsigned int rcvctrl_ops = 0;
  68        int ret;
  69
  70        uctxt->do_interrupt = &handle_receive_interrupt;
  71
  72        /* Now allocate the RcvHdr queue and eager buffers. */
  73        ret = hfi1_create_rcvhdrq(dd, uctxt);
  74        if (ret)
  75                goto done;
  76
  77        ret = hfi1_setup_eagerbufs(uctxt);
  78        if (ret)
  79                goto done;
  80
  81        if (uctxt->rcvhdrtail_kvaddr)
  82                clear_rcvhdrtail(uctxt);
  83
  84        rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
  85        rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
  86
  87        if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
  88                rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
  89        if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
  90                rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
  91        if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
  92                rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
  93        if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
  94                rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
  95
  96        hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
  97done:
  98        return ret;
  99}
 100
 101static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
 102                              struct hfi1_ctxtdata **vnic_ctxt)
 103{
 104        struct hfi1_ctxtdata *uctxt;
 105        int ret;
 106
 107        if (dd->flags & HFI1_FROZEN)
 108                return -EIO;
 109
 110        ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
 111        if (ret < 0) {
 112                dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
 113                return -ENOMEM;
 114        }
 115
 116        uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
 117                        HFI1_CAP_KGET(NODROP_RHQ_FULL) |
 118                        HFI1_CAP_KGET(NODROP_EGR_FULL) |
 119                        HFI1_CAP_KGET(DMA_RTAIL);
 120        uctxt->seq_cnt = 1;
 121        uctxt->is_vnic = true;
 122
 123        msix_request_rcd_irq(uctxt);
 124
 125        hfi1_stats.sps_ctxts++;
 126        dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
 127        *vnic_ctxt = uctxt;
 128
 129        return 0;
 130}
 131
 132static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
 133                                 struct hfi1_ctxtdata *uctxt)
 134{
 135        dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
 136        flush_wc();
 137
 138        /*
 139         * Disable receive context and interrupt available, reset all
 140         * RcvCtxtCtrl bits to default values.
 141         */
 142        hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
 143                     HFI1_RCVCTRL_TIDFLOW_DIS |
 144                     HFI1_RCVCTRL_INTRAVAIL_DIS |
 145                     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
 146                     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
 147                     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
 148
 149        /* msix_intr will always be > 0, only clean up if this is true */
 150        if (uctxt->msix_intr)
 151                msix_free_irq(dd, uctxt->msix_intr);
 152
 153        uctxt->event_flags = 0;
 154
 155        hfi1_clear_tids(uctxt);
 156        hfi1_clear_ctxt_pkey(dd, uctxt);
 157
 158        hfi1_stats.sps_ctxts--;
 159
 160        hfi1_free_ctxt(uctxt);
 161}
 162
 163void hfi1_vnic_setup(struct hfi1_devdata *dd)
 164{
 165        xa_init(&dd->vnic.vesws);
 166}
 167
 168void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
 169{
 170        WARN_ON(!xa_empty(&dd->vnic.vesws));
 171}
 172
 173#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do {            \
 174                u64 *src64, *dst64;                            \
 175                for (src64 = &qstats->x_grp.unicast,           \
 176                        dst64 = &stats->x_grp.unicast;         \
 177                        dst64 <= &stats->x_grp.s_1519_max;) {  \
 178                        *dst64++ += *src64++;                  \
 179                }                                              \
 180        } while (0)
 181
 182/* hfi1_vnic_update_stats - update statistics */
 183static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
 184                                   struct opa_vnic_stats *stats)
 185{
 186        struct net_device *netdev = vinfo->netdev;
 187        u8 i;
 188
 189        /* add tx counters on different queues */
 190        for (i = 0; i < vinfo->num_tx_q; i++) {
 191                struct opa_vnic_stats *qstats = &vinfo->stats[i];
 192                struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
 193
 194                stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
 195                stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
 196                stats->tx_drop_state += qstats->tx_drop_state;
 197                stats->tx_dlid_zero += qstats->tx_dlid_zero;
 198
 199                SUM_GRP_COUNTERS(stats, qstats, tx_grp);
 200                stats->netstats.tx_packets += qnstats->tx_packets;
 201                stats->netstats.tx_bytes += qnstats->tx_bytes;
 202        }
 203
 204        /* add rx counters on different queues */
 205        for (i = 0; i < vinfo->num_rx_q; i++) {
 206                struct opa_vnic_stats *qstats = &vinfo->stats[i];
 207                struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
 208
 209                stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
 210                stats->netstats.rx_nohandler += qnstats->rx_nohandler;
 211                stats->rx_drop_state += qstats->rx_drop_state;
 212                stats->rx_oversize += qstats->rx_oversize;
 213                stats->rx_runt += qstats->rx_runt;
 214
 215                SUM_GRP_COUNTERS(stats, qstats, rx_grp);
 216                stats->netstats.rx_packets += qnstats->rx_packets;
 217                stats->netstats.rx_bytes += qnstats->rx_bytes;
 218        }
 219
 220        stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
 221                                    stats->netstats.tx_carrier_errors +
 222                                    stats->tx_drop_state + stats->tx_dlid_zero;
 223        stats->netstats.tx_dropped = stats->netstats.tx_errors;
 224
 225        stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
 226                                    stats->netstats.rx_nohandler +
 227                                    stats->rx_drop_state + stats->rx_oversize +
 228                                    stats->rx_runt;
 229        stats->netstats.rx_dropped = stats->netstats.rx_errors;
 230
 231        netdev->stats.tx_packets = stats->netstats.tx_packets;
 232        netdev->stats.tx_bytes = stats->netstats.tx_bytes;
 233        netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
 234        netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
 235        netdev->stats.tx_errors = stats->netstats.tx_errors;
 236        netdev->stats.tx_dropped = stats->netstats.tx_dropped;
 237
 238        netdev->stats.rx_packets = stats->netstats.rx_packets;
 239        netdev->stats.rx_bytes = stats->netstats.rx_bytes;
 240        netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
 241        netdev->stats.multicast = stats->rx_grp.mcastbcast;
 242        netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
 243        netdev->stats.rx_errors = stats->netstats.rx_errors;
 244        netdev->stats.rx_dropped = stats->netstats.rx_dropped;
 245}
 246
 247/* update_len_counters - update pkt's len histogram counters */
 248static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
 249                                       int len)
 250{
 251        /* account for 4 byte FCS */
 252        if (len >= 1515)
 253                grp->s_1519_max++;
 254        else if (len >= 1020)
 255                grp->s_1024_1518++;
 256        else if (len >= 508)
 257                grp->s_512_1023++;
 258        else if (len >= 252)
 259                grp->s_256_511++;
 260        else if (len >= 124)
 261                grp->s_128_255++;
 262        else if (len >= 61)
 263                grp->s_65_127++;
 264        else
 265                grp->s_64++;
 266}
 267
 268/* hfi1_vnic_update_tx_counters - update transmit counters */
 269static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
 270                                         u8 q_idx, struct sk_buff *skb, int err)
 271{
 272        struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
 273        struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
 274        struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
 275        u16 vlan_tci;
 276
 277        stats->netstats.tx_packets++;
 278        stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
 279
 280        update_len_counters(tx_grp, skb->len);
 281
 282        /* rest of the counts are for good packets only */
 283        if (unlikely(err))
 284                return;
 285
 286        if (is_multicast_ether_addr(mac_hdr->h_dest))
 287                tx_grp->mcastbcast++;
 288        else
 289                tx_grp->unicast++;
 290
 291        if (!__vlan_get_tag(skb, &vlan_tci))
 292                tx_grp->vlan++;
 293        else
 294                tx_grp->untagged++;
 295}
 296
 297/* hfi1_vnic_update_rx_counters - update receive counters */
 298static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
 299                                         u8 q_idx, struct sk_buff *skb, int err)
 300{
 301        struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
 302        struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
 303        struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
 304        u16 vlan_tci;
 305
 306        stats->netstats.rx_packets++;
 307        stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
 308
 309        update_len_counters(rx_grp, skb->len);
 310
 311        /* rest of the counts are for good packets only */
 312        if (unlikely(err))
 313                return;
 314
 315        if (is_multicast_ether_addr(mac_hdr->h_dest))
 316                rx_grp->mcastbcast++;
 317        else
 318                rx_grp->unicast++;
 319
 320        if (!__vlan_get_tag(skb, &vlan_tci))
 321                rx_grp->vlan++;
 322        else
 323                rx_grp->untagged++;
 324}
 325
 326/* This function is overloaded for opa_vnic specific implementation */
 327static void hfi1_vnic_get_stats64(struct net_device *netdev,
 328                                  struct rtnl_link_stats64 *stats)
 329{
 330        struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
 331        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 332
 333        hfi1_vnic_update_stats(vinfo, vstats);
 334}
 335
 336static u64 create_bypass_pbc(u32 vl, u32 dw_len)
 337{
 338        u64 pbc;
 339
 340        pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
 341                | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
 342                | PBC_PACKET_BYPASS
 343                | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
 344                | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
 345
 346        return pbc;
 347}
 348
 349/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
 350static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
 351                                    u8 q_idx)
 352{
 353        netif_stop_subqueue(vinfo->netdev, q_idx);
 354        if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
 355                return;
 356
 357        netif_start_subqueue(vinfo->netdev, q_idx);
 358}
 359
 360static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
 361                                          struct net_device *netdev)
 362{
 363        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 364        u8 pad_len, q_idx = skb->queue_mapping;
 365        struct hfi1_devdata *dd = vinfo->dd;
 366        struct opa_vnic_skb_mdata *mdata;
 367        u32 pkt_len, total_len;
 368        int err = -EINVAL;
 369        u64 pbc;
 370
 371        v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
 372        if (unlikely(!netif_oper_up(netdev))) {
 373                vinfo->stats[q_idx].tx_drop_state++;
 374                goto tx_finish;
 375        }
 376
 377        /* take out meta data */
 378        mdata = (struct opa_vnic_skb_mdata *)skb->data;
 379        skb_pull(skb, sizeof(*mdata));
 380        if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
 381                vinfo->stats[q_idx].tx_dlid_zero++;
 382                goto tx_finish;
 383        }
 384
 385        /* add tail padding (for 8 bytes size alignment) and icrc */
 386        pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
 387        pad_len += OPA_VNIC_ICRC_TAIL_LEN;
 388
 389        /*
 390         * pkt_len is how much data we have to write, includes header and data.
 391         * total_len is length of the packet in Dwords plus the PBC should not
 392         * include the CRC.
 393         */
 394        pkt_len = (skb->len + pad_len) >> 2;
 395        total_len = pkt_len + 2; /* PBC + packet */
 396
 397        pbc = create_bypass_pbc(mdata->vl, total_len);
 398
 399        skb_get(skb);
 400        v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
 401        err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
 402        if (unlikely(err)) {
 403                if (err == -ENOMEM)
 404                        vinfo->stats[q_idx].netstats.tx_fifo_errors++;
 405                else if (err != -EBUSY)
 406                        vinfo->stats[q_idx].netstats.tx_carrier_errors++;
 407        }
 408        /* remove the header before updating tx counters */
 409        skb_pull(skb, OPA_VNIC_HDR_LEN);
 410
 411        if (unlikely(err == -EBUSY)) {
 412                hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
 413                dev_kfree_skb_any(skb);
 414                return NETDEV_TX_BUSY;
 415        }
 416
 417tx_finish:
 418        /* update tx counters */
 419        hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
 420        dev_kfree_skb_any(skb);
 421        return NETDEV_TX_OK;
 422}
 423
 424static u16 hfi1_vnic_select_queue(struct net_device *netdev,
 425                                  struct sk_buff *skb,
 426                                  struct net_device *sb_dev)
 427{
 428        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 429        struct opa_vnic_skb_mdata *mdata;
 430        struct sdma_engine *sde;
 431
 432        mdata = (struct opa_vnic_skb_mdata *)skb->data;
 433        sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
 434        return sde->this_idx;
 435}
 436
 437/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
 438static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
 439                                      struct sk_buff *skb)
 440{
 441        struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
 442        int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
 443        int rc = -EFAULT;
 444
 445        skb_pull(skb, OPA_VNIC_HDR_LEN);
 446
 447        /* Validate Packet length */
 448        if (unlikely(skb->len > max_len))
 449                vinfo->stats[rxq->idx].rx_oversize++;
 450        else if (unlikely(skb->len < ETH_ZLEN))
 451                vinfo->stats[rxq->idx].rx_runt++;
 452        else
 453                rc = 0;
 454        return rc;
 455}
 456
 457static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
 458{
 459        unsigned char *pad_info;
 460        struct sk_buff *skb;
 461
 462        skb = skb_dequeue(&rxq->skbq);
 463        if (unlikely(!skb))
 464                return NULL;
 465
 466        /* remove tail padding and icrc */
 467        pad_info = skb->data + skb->len - 1;
 468        skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
 469                       ((*pad_info) & 0x7)));
 470
 471        return skb;
 472}
 473
 474/* hfi1_vnic_handle_rx - handle skb receive */
 475static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
 476                                int *work_done, int work_to_do)
 477{
 478        struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
 479        struct sk_buff *skb;
 480        int rc;
 481
 482        while (1) {
 483                if (*work_done >= work_to_do)
 484                        break;
 485
 486                skb = hfi1_vnic_get_skb(rxq);
 487                if (unlikely(!skb))
 488                        break;
 489
 490                rc = hfi1_vnic_decap_skb(rxq, skb);
 491                /* update rx counters */
 492                hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
 493                if (unlikely(rc)) {
 494                        dev_kfree_skb_any(skb);
 495                        continue;
 496                }
 497
 498                skb_checksum_none_assert(skb);
 499                skb->protocol = eth_type_trans(skb, rxq->netdev);
 500
 501                napi_gro_receive(&rxq->napi, skb);
 502                (*work_done)++;
 503        }
 504}
 505
 506/* hfi1_vnic_napi - napi receive polling callback function */
 507static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
 508{
 509        struct hfi1_vnic_rx_queue *rxq = container_of(napi,
 510                                              struct hfi1_vnic_rx_queue, napi);
 511        struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
 512        int work_done = 0;
 513
 514        v_dbg("napi %d budget %d\n", rxq->idx, budget);
 515        hfi1_vnic_handle_rx(rxq, &work_done, budget);
 516
 517        v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
 518        if (work_done < budget)
 519                napi_complete(napi);
 520
 521        return work_done;
 522}
 523
 524void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
 525{
 526        struct hfi1_devdata *dd = packet->rcd->dd;
 527        struct hfi1_vnic_vport_info *vinfo = NULL;
 528        struct hfi1_vnic_rx_queue *rxq;
 529        struct sk_buff *skb;
 530        int l4_type, vesw_id = -1;
 531        u8 q_idx;
 532
 533        l4_type = hfi1_16B_get_l4(packet->ebuf);
 534        if (likely(l4_type == OPA_16B_L4_ETHR)) {
 535                vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
 536                vinfo = xa_load(&dd->vnic.vesws, vesw_id);
 537
 538                /*
 539                 * In case of invalid vesw id, count the error on
 540                 * the first available vport.
 541                 */
 542                if (unlikely(!vinfo)) {
 543                        struct hfi1_vnic_vport_info *vinfo_tmp;
 544                        unsigned long index = 0;
 545
 546                        vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX,
 547                                        XA_PRESENT);
 548                        if (vinfo_tmp) {
 549                                spin_lock(&vport_cntr_lock);
 550                                vinfo_tmp->stats[0].netstats.rx_nohandler++;
 551                                spin_unlock(&vport_cntr_lock);
 552                        }
 553                }
 554        }
 555
 556        if (unlikely(!vinfo)) {
 557                dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
 558                            l4_type, vesw_id, packet->rcd->ctxt);
 559                return;
 560        }
 561
 562        q_idx = packet->rcd->vnic_q_idx;
 563        rxq = &vinfo->rxq[q_idx];
 564        if (unlikely(!netif_oper_up(vinfo->netdev))) {
 565                vinfo->stats[q_idx].rx_drop_state++;
 566                skb_queue_purge(&rxq->skbq);
 567                return;
 568        }
 569
 570        if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
 571                vinfo->stats[q_idx].netstats.rx_fifo_errors++;
 572                return;
 573        }
 574
 575        skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
 576        if (unlikely(!skb)) {
 577                vinfo->stats[q_idx].netstats.rx_fifo_errors++;
 578                return;
 579        }
 580
 581        memcpy(skb->data, packet->ebuf, packet->tlen);
 582        skb_put(skb, packet->tlen);
 583        skb_queue_tail(&rxq->skbq, skb);
 584
 585        if (napi_schedule_prep(&rxq->napi)) {
 586                v_dbg("napi %d scheduling\n", q_idx);
 587                __napi_schedule(&rxq->napi);
 588        }
 589}
 590
 591static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
 592{
 593        struct hfi1_devdata *dd = vinfo->dd;
 594        struct net_device *netdev = vinfo->netdev;
 595        int i, rc;
 596
 597        /* ensure virtual eth switch id is valid */
 598        if (!vinfo->vesw_id)
 599                return -EINVAL;
 600
 601        rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL);
 602        if (rc < 0)
 603                return rc;
 604
 605        for (i = 0; i < vinfo->num_rx_q; i++) {
 606                struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
 607
 608                skb_queue_head_init(&rxq->skbq);
 609                napi_enable(&rxq->napi);
 610        }
 611
 612        netif_carrier_on(netdev);
 613        netif_tx_start_all_queues(netdev);
 614        set_bit(HFI1_VNIC_UP, &vinfo->flags);
 615
 616        return 0;
 617}
 618
 619static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
 620{
 621        struct hfi1_devdata *dd = vinfo->dd;
 622        u8 i;
 623
 624        clear_bit(HFI1_VNIC_UP, &vinfo->flags);
 625        netif_carrier_off(vinfo->netdev);
 626        netif_tx_disable(vinfo->netdev);
 627        xa_erase(&dd->vnic.vesws, vinfo->vesw_id);
 628
 629        /* ensure irqs see the change */
 630        msix_vnic_synchronize_irq(dd);
 631
 632        /* remove unread skbs */
 633        for (i = 0; i < vinfo->num_rx_q; i++) {
 634                struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
 635
 636                napi_disable(&rxq->napi);
 637                skb_queue_purge(&rxq->skbq);
 638        }
 639}
 640
 641static int hfi1_netdev_open(struct net_device *netdev)
 642{
 643        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 644        int rc;
 645
 646        mutex_lock(&vinfo->lock);
 647        rc = hfi1_vnic_up(vinfo);
 648        mutex_unlock(&vinfo->lock);
 649        return rc;
 650}
 651
 652static int hfi1_netdev_close(struct net_device *netdev)
 653{
 654        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 655
 656        mutex_lock(&vinfo->lock);
 657        if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
 658                hfi1_vnic_down(vinfo);
 659        mutex_unlock(&vinfo->lock);
 660        return 0;
 661}
 662
 663static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
 664                                struct hfi1_ctxtdata **vnic_ctxt)
 665{
 666        int rc;
 667
 668        rc = allocate_vnic_ctxt(dd, vnic_ctxt);
 669        if (rc) {
 670                dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
 671                return rc;
 672        }
 673
 674        rc = setup_vnic_ctxt(dd, *vnic_ctxt);
 675        if (rc) {
 676                dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
 677                deallocate_vnic_ctxt(dd, *vnic_ctxt);
 678                *vnic_ctxt = NULL;
 679        }
 680
 681        return rc;
 682}
 683
 684static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
 685{
 686        struct hfi1_devdata *dd = vinfo->dd;
 687        int i, rc = 0;
 688
 689        mutex_lock(&hfi1_mutex);
 690        if (!dd->vnic.num_vports) {
 691                rc = hfi1_vnic_txreq_init(dd);
 692                if (rc)
 693                        goto txreq_fail;
 694        }
 695
 696        for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
 697                rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
 698                if (rc)
 699                        break;
 700                hfi1_rcd_get(dd->vnic.ctxt[i]);
 701                dd->vnic.ctxt[i]->vnic_q_idx = i;
 702        }
 703
 704        if (i < vinfo->num_rx_q) {
 705                /*
 706                 * If required amount of contexts is not
 707                 * allocated successfully then remaining contexts
 708                 * are released.
 709                 */
 710                while (i-- > dd->vnic.num_ctxt) {
 711                        deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
 712                        hfi1_rcd_put(dd->vnic.ctxt[i]);
 713                        dd->vnic.ctxt[i] = NULL;
 714                }
 715                goto alloc_fail;
 716        }
 717
 718        if (dd->vnic.num_ctxt != i) {
 719                dd->vnic.num_ctxt = i;
 720                hfi1_init_vnic_rsm(dd);
 721        }
 722
 723        dd->vnic.num_vports++;
 724        hfi1_vnic_sdma_init(vinfo);
 725alloc_fail:
 726        if (!dd->vnic.num_vports)
 727                hfi1_vnic_txreq_deinit(dd);
 728txreq_fail:
 729        mutex_unlock(&hfi1_mutex);
 730        return rc;
 731}
 732
 733static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
 734{
 735        struct hfi1_devdata *dd = vinfo->dd;
 736        int i;
 737
 738        mutex_lock(&hfi1_mutex);
 739        if (--dd->vnic.num_vports == 0) {
 740                for (i = 0; i < dd->vnic.num_ctxt; i++) {
 741                        deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
 742                        hfi1_rcd_put(dd->vnic.ctxt[i]);
 743                        dd->vnic.ctxt[i] = NULL;
 744                }
 745                hfi1_deinit_vnic_rsm(dd);
 746                dd->vnic.num_ctxt = 0;
 747                hfi1_vnic_txreq_deinit(dd);
 748        }
 749        mutex_unlock(&hfi1_mutex);
 750}
 751
 752static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
 753{
 754        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 755        bool reopen = false;
 756
 757        /*
 758         * If vesw_id is being changed, and if the vnic port is up,
 759         * reset the vnic port to ensure new vesw_id gets picked up
 760         */
 761        if (id != vinfo->vesw_id) {
 762                mutex_lock(&vinfo->lock);
 763                if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
 764                        hfi1_vnic_down(vinfo);
 765                        reopen = true;
 766                }
 767
 768                vinfo->vesw_id = id;
 769                if (reopen)
 770                        hfi1_vnic_up(vinfo);
 771
 772                mutex_unlock(&vinfo->lock);
 773        }
 774}
 775
 776/* netdev ops */
 777static const struct net_device_ops hfi1_netdev_ops = {
 778        .ndo_open = hfi1_netdev_open,
 779        .ndo_stop = hfi1_netdev_close,
 780        .ndo_start_xmit = hfi1_netdev_start_xmit,
 781        .ndo_select_queue = hfi1_vnic_select_queue,
 782        .ndo_get_stats64 = hfi1_vnic_get_stats64,
 783};
 784
 785static void hfi1_vnic_free_rn(struct net_device *netdev)
 786{
 787        struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 788
 789        hfi1_vnic_deinit(vinfo);
 790        mutex_destroy(&vinfo->lock);
 791        free_netdev(netdev);
 792}
 793
 794struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
 795                                      u8 port_num,
 796                                      enum rdma_netdev_t type,
 797                                      const char *name,
 798                                      unsigned char name_assign_type,
 799                                      void (*setup)(struct net_device *))
 800{
 801        struct hfi1_devdata *dd = dd_from_ibdev(device);
 802        struct hfi1_vnic_vport_info *vinfo;
 803        struct net_device *netdev;
 804        struct rdma_netdev *rn;
 805        int i, size, rc;
 806
 807        if (!dd->num_vnic_contexts)
 808                return ERR_PTR(-ENOMEM);
 809
 810        if (!port_num || (port_num > dd->num_pports))
 811                return ERR_PTR(-EINVAL);
 812
 813        if (type != RDMA_NETDEV_OPA_VNIC)
 814                return ERR_PTR(-EOPNOTSUPP);
 815
 816        size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
 817        netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
 818                                  dd->num_sdma, dd->num_vnic_contexts);
 819        if (!netdev)
 820                return ERR_PTR(-ENOMEM);
 821
 822        rn = netdev_priv(netdev);
 823        vinfo = opa_vnic_dev_priv(netdev);
 824        vinfo->dd = dd;
 825        vinfo->num_tx_q = dd->num_sdma;
 826        vinfo->num_rx_q = dd->num_vnic_contexts;
 827        vinfo->netdev = netdev;
 828        rn->free_rdma_netdev = hfi1_vnic_free_rn;
 829        rn->set_id = hfi1_vnic_set_vesw_id;
 830
 831        netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
 832        netdev->hw_features = netdev->features;
 833        netdev->vlan_features = netdev->features;
 834        netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
 835        netdev->netdev_ops = &hfi1_netdev_ops;
 836        mutex_init(&vinfo->lock);
 837
 838        for (i = 0; i < vinfo->num_rx_q; i++) {
 839                struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
 840
 841                rxq->idx = i;
 842                rxq->vinfo = vinfo;
 843                rxq->netdev = netdev;
 844                netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
 845        }
 846
 847        rc = hfi1_vnic_init(vinfo);
 848        if (rc)
 849                goto init_fail;
 850
 851        return netdev;
 852init_fail:
 853        mutex_destroy(&vinfo->lock);
 854        free_netdev(netdev);
 855        return ERR_PTR(rc);
 856}
 857