linux/drivers/infiniband/sw/rxe/rxe_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/dma-mapping.h>
  35#include <net/addrconf.h>
  36#include <rdma/uverbs_ioctl.h>
  37#include "rxe.h"
  38#include "rxe_loc.h"
  39#include "rxe_queue.h"
  40#include "rxe_hw_counters.h"
  41
  42static int rxe_query_device(struct ib_device *dev,
  43                            struct ib_device_attr *attr,
  44                            struct ib_udata *uhw)
  45{
  46        struct rxe_dev *rxe = to_rdev(dev);
  47
  48        if (uhw->inlen || uhw->outlen)
  49                return -EINVAL;
  50
  51        *attr = rxe->attr;
  52        return 0;
  53}
  54
  55static int rxe_query_port(struct ib_device *dev,
  56                          u8 port_num, struct ib_port_attr *attr)
  57{
  58        struct rxe_dev *rxe = to_rdev(dev);
  59        struct rxe_port *port;
  60        int rc;
  61
  62        port = &rxe->port;
  63
  64        /* *attr being zeroed by the caller, avoid zeroing it here */
  65        *attr = port->attr;
  66
  67        mutex_lock(&rxe->usdev_lock);
  68        rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
  69                              &attr->active_width);
  70
  71        if (attr->state == IB_PORT_ACTIVE)
  72                attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
  73        else if (dev_get_flags(rxe->ndev) & IFF_UP)
  74                attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
  75        else
  76                attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
  77
  78        mutex_unlock(&rxe->usdev_lock);
  79
  80        return rc;
  81}
  82
  83static int rxe_query_pkey(struct ib_device *device,
  84                          u8 port_num, u16 index, u16 *pkey)
  85{
  86        struct rxe_dev *rxe = to_rdev(device);
  87        struct rxe_port *port;
  88
  89        port = &rxe->port;
  90
  91        if (unlikely(index >= port->attr.pkey_tbl_len)) {
  92                dev_warn(device->dev.parent, "invalid index = %d\n",
  93                         index);
  94                goto err1;
  95        }
  96
  97        *pkey = port->pkey_tbl[index];
  98        return 0;
  99
 100err1:
 101        return -EINVAL;
 102}
 103
 104static int rxe_modify_device(struct ib_device *dev,
 105                             int mask, struct ib_device_modify *attr)
 106{
 107        struct rxe_dev *rxe = to_rdev(dev);
 108
 109        if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
 110                rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
 111
 112        if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
 113                memcpy(rxe->ib_dev.node_desc,
 114                       attr->node_desc, sizeof(rxe->ib_dev.node_desc));
 115        }
 116
 117        return 0;
 118}
 119
 120static int rxe_modify_port(struct ib_device *dev,
 121                           u8 port_num, int mask, struct ib_port_modify *attr)
 122{
 123        struct rxe_dev *rxe = to_rdev(dev);
 124        struct rxe_port *port;
 125
 126        port = &rxe->port;
 127
 128        port->attr.port_cap_flags |= attr->set_port_cap_mask;
 129        port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
 130
 131        if (mask & IB_PORT_RESET_QKEY_CNTR)
 132                port->attr.qkey_viol_cntr = 0;
 133
 134        return 0;
 135}
 136
 137static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
 138                                               u8 port_num)
 139{
 140        struct rxe_dev *rxe = to_rdev(dev);
 141
 142        return rxe_link_layer(rxe, port_num);
 143}
 144
 145static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 146{
 147        struct rxe_dev *rxe = to_rdev(uctx->device);
 148        struct rxe_ucontext *uc = to_ruc(uctx);
 149
 150        return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem);
 151}
 152
 153static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
 154{
 155        struct rxe_ucontext *uc = to_ruc(ibuc);
 156
 157        rxe_drop_ref(uc);
 158}
 159
 160static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
 161                              struct ib_port_immutable *immutable)
 162{
 163        int err;
 164        struct ib_port_attr attr;
 165
 166        immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
 167
 168        err = ib_query_port(dev, port_num, &attr);
 169        if (err)
 170                return err;
 171
 172        immutable->pkey_tbl_len = attr.pkey_tbl_len;
 173        immutable->gid_tbl_len = attr.gid_tbl_len;
 174        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 175
 176        return 0;
 177}
 178
 179static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 180{
 181        struct rxe_dev *rxe = to_rdev(ibpd->device);
 182        struct rxe_pd *pd = to_rpd(ibpd);
 183
 184        return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem);
 185}
 186
 187static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 188{
 189        struct rxe_pd *pd = to_rpd(ibpd);
 190
 191        rxe_drop_ref(pd);
 192}
 193
 194static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr,
 195                         u32 flags, struct ib_udata *udata)
 196
 197{
 198        int err;
 199        struct rxe_dev *rxe = to_rdev(ibah->device);
 200        struct rxe_ah *ah = to_rah(ibah);
 201
 202        err = rxe_av_chk_attr(rxe, attr);
 203        if (err)
 204                return err;
 205
 206        err = rxe_add_to_pool(&rxe->ah_pool, &ah->pelem);
 207        if (err)
 208                return err;
 209
 210        rxe_init_av(attr, &ah->av);
 211        return 0;
 212}
 213
 214static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 215{
 216        int err;
 217        struct rxe_dev *rxe = to_rdev(ibah->device);
 218        struct rxe_ah *ah = to_rah(ibah);
 219
 220        err = rxe_av_chk_attr(rxe, attr);
 221        if (err)
 222                return err;
 223
 224        rxe_init_av(attr, &ah->av);
 225        return 0;
 226}
 227
 228static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 229{
 230        struct rxe_ah *ah = to_rah(ibah);
 231
 232        memset(attr, 0, sizeof(*attr));
 233        attr->type = ibah->type;
 234        rxe_av_to_attr(&ah->av, attr);
 235        return 0;
 236}
 237
 238static void rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
 239{
 240        struct rxe_ah *ah = to_rah(ibah);
 241
 242        rxe_drop_ref(ah);
 243}
 244
 245static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
 246{
 247        int err;
 248        int i;
 249        u32 length;
 250        struct rxe_recv_wqe *recv_wqe;
 251        int num_sge = ibwr->num_sge;
 252
 253        if (unlikely(queue_full(rq->queue))) {
 254                err = -ENOMEM;
 255                goto err1;
 256        }
 257
 258        if (unlikely(num_sge > rq->max_sge)) {
 259                err = -EINVAL;
 260                goto err1;
 261        }
 262
 263        length = 0;
 264        for (i = 0; i < num_sge; i++)
 265                length += ibwr->sg_list[i].length;
 266
 267        recv_wqe = producer_addr(rq->queue);
 268        recv_wqe->wr_id = ibwr->wr_id;
 269        recv_wqe->num_sge = num_sge;
 270
 271        memcpy(recv_wqe->dma.sge, ibwr->sg_list,
 272               num_sge * sizeof(struct ib_sge));
 273
 274        recv_wqe->dma.length            = length;
 275        recv_wqe->dma.resid             = length;
 276        recv_wqe->dma.num_sge           = num_sge;
 277        recv_wqe->dma.cur_sge           = 0;
 278        recv_wqe->dma.sge_offset        = 0;
 279
 280        /* make sure all changes to the work queue are written before we
 281         * update the producer pointer
 282         */
 283        smp_wmb();
 284
 285        advance_producer(rq->queue);
 286        return 0;
 287
 288err1:
 289        return err;
 290}
 291
 292static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
 293                          struct ib_udata *udata)
 294{
 295        int err;
 296        struct rxe_dev *rxe = to_rdev(ibsrq->device);
 297        struct rxe_pd *pd = to_rpd(ibsrq->pd);
 298        struct rxe_srq *srq = to_rsrq(ibsrq);
 299        struct rxe_create_srq_resp __user *uresp = NULL;
 300
 301        if (udata) {
 302                if (udata->outlen < sizeof(*uresp))
 303                        return -EINVAL;
 304                uresp = udata->outbuf;
 305        }
 306
 307        err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
 308        if (err)
 309                goto err1;
 310
 311        err = rxe_add_to_pool(&rxe->srq_pool, &srq->pelem);
 312        if (err)
 313                goto err1;
 314
 315        rxe_add_ref(pd);
 316        srq->pd = pd;
 317
 318        err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
 319        if (err)
 320                goto err2;
 321
 322        return 0;
 323
 324err2:
 325        rxe_drop_ref(pd);
 326        rxe_drop_ref(srq);
 327err1:
 328        return err;
 329}
 330
 331static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 332                          enum ib_srq_attr_mask mask,
 333                          struct ib_udata *udata)
 334{
 335        int err;
 336        struct rxe_srq *srq = to_rsrq(ibsrq);
 337        struct rxe_dev *rxe = to_rdev(ibsrq->device);
 338        struct rxe_modify_srq_cmd ucmd = {};
 339
 340        if (udata) {
 341                if (udata->inlen < sizeof(ucmd))
 342                        return -EINVAL;
 343
 344                err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
 345                if (err)
 346                        return err;
 347        }
 348
 349        err = rxe_srq_chk_attr(rxe, srq, attr, mask);
 350        if (err)
 351                goto err1;
 352
 353        err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
 354        if (err)
 355                goto err1;
 356
 357        return 0;
 358
 359err1:
 360        return err;
 361}
 362
 363static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
 364{
 365        struct rxe_srq *srq = to_rsrq(ibsrq);
 366
 367        if (srq->error)
 368                return -EINVAL;
 369
 370        attr->max_wr = srq->rq.queue->buf->index_mask;
 371        attr->max_sge = srq->rq.max_sge;
 372        attr->srq_limit = srq->limit;
 373        return 0;
 374}
 375
 376static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 377{
 378        struct rxe_srq *srq = to_rsrq(ibsrq);
 379
 380        if (srq->rq.queue)
 381                rxe_queue_cleanup(srq->rq.queue);
 382
 383        rxe_drop_ref(srq->pd);
 384        rxe_drop_ref(srq);
 385}
 386
 387static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 388                             const struct ib_recv_wr **bad_wr)
 389{
 390        int err = 0;
 391        unsigned long flags;
 392        struct rxe_srq *srq = to_rsrq(ibsrq);
 393
 394        spin_lock_irqsave(&srq->rq.producer_lock, flags);
 395
 396        while (wr) {
 397                err = post_one_recv(&srq->rq, wr);
 398                if (unlikely(err))
 399                        break;
 400                wr = wr->next;
 401        }
 402
 403        spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
 404
 405        if (err)
 406                *bad_wr = wr;
 407
 408        return err;
 409}
 410
 411static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
 412                                   struct ib_qp_init_attr *init,
 413                                   struct ib_udata *udata)
 414{
 415        int err;
 416        struct rxe_dev *rxe = to_rdev(ibpd->device);
 417        struct rxe_pd *pd = to_rpd(ibpd);
 418        struct rxe_qp *qp;
 419        struct rxe_create_qp_resp __user *uresp = NULL;
 420
 421        if (udata) {
 422                if (udata->outlen < sizeof(*uresp))
 423                        return ERR_PTR(-EINVAL);
 424                uresp = udata->outbuf;
 425        }
 426
 427        err = rxe_qp_chk_init(rxe, init);
 428        if (err)
 429                goto err1;
 430
 431        qp = rxe_alloc(&rxe->qp_pool);
 432        if (!qp) {
 433                err = -ENOMEM;
 434                goto err1;
 435        }
 436
 437        if (udata) {
 438                if (udata->inlen) {
 439                        err = -EINVAL;
 440                        goto err2;
 441                }
 442                qp->is_user = 1;
 443        }
 444
 445        rxe_add_index(qp);
 446
 447        err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd, udata);
 448        if (err)
 449                goto err3;
 450
 451        return &qp->ibqp;
 452
 453err3:
 454        rxe_drop_index(qp);
 455err2:
 456        rxe_drop_ref(qp);
 457err1:
 458        return ERR_PTR(err);
 459}
 460
 461static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 462                         int mask, struct ib_udata *udata)
 463{
 464        int err;
 465        struct rxe_dev *rxe = to_rdev(ibqp->device);
 466        struct rxe_qp *qp = to_rqp(ibqp);
 467
 468        err = rxe_qp_chk_attr(rxe, qp, attr, mask);
 469        if (err)
 470                goto err1;
 471
 472        err = rxe_qp_from_attr(qp, attr, mask, udata);
 473        if (err)
 474                goto err1;
 475
 476        return 0;
 477
 478err1:
 479        return err;
 480}
 481
 482static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 483                        int mask, struct ib_qp_init_attr *init)
 484{
 485        struct rxe_qp *qp = to_rqp(ibqp);
 486
 487        rxe_qp_to_init(qp, init);
 488        rxe_qp_to_attr(qp, attr, mask);
 489
 490        return 0;
 491}
 492
 493static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 494{
 495        struct rxe_qp *qp = to_rqp(ibqp);
 496
 497        rxe_qp_destroy(qp);
 498        rxe_drop_index(qp);
 499        rxe_drop_ref(qp);
 500        return 0;
 501}
 502
 503static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 504                            unsigned int mask, unsigned int length)
 505{
 506        int num_sge = ibwr->num_sge;
 507        struct rxe_sq *sq = &qp->sq;
 508
 509        if (unlikely(num_sge > sq->max_sge))
 510                goto err1;
 511
 512        if (unlikely(mask & WR_ATOMIC_MASK)) {
 513                if (length < 8)
 514                        goto err1;
 515
 516                if (atomic_wr(ibwr)->remote_addr & 0x7)
 517                        goto err1;
 518        }
 519
 520        if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
 521                     (length > sq->max_inline)))
 522                goto err1;
 523
 524        return 0;
 525
 526err1:
 527        return -EINVAL;
 528}
 529
 530static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
 531                         const struct ib_send_wr *ibwr)
 532{
 533        wr->wr_id = ibwr->wr_id;
 534        wr->num_sge = ibwr->num_sge;
 535        wr->opcode = ibwr->opcode;
 536        wr->send_flags = ibwr->send_flags;
 537
 538        if (qp_type(qp) == IB_QPT_UD ||
 539            qp_type(qp) == IB_QPT_SMI ||
 540            qp_type(qp) == IB_QPT_GSI) {
 541                wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
 542                wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
 543                if (qp_type(qp) == IB_QPT_GSI)
 544                        wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
 545                if (wr->opcode == IB_WR_SEND_WITH_IMM)
 546                        wr->ex.imm_data = ibwr->ex.imm_data;
 547        } else {
 548                switch (wr->opcode) {
 549                case IB_WR_RDMA_WRITE_WITH_IMM:
 550                        wr->ex.imm_data = ibwr->ex.imm_data;
 551                        /* fall through */
 552                case IB_WR_RDMA_READ:
 553                case IB_WR_RDMA_WRITE:
 554                        wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
 555                        wr->wr.rdma.rkey        = rdma_wr(ibwr)->rkey;
 556                        break;
 557                case IB_WR_SEND_WITH_IMM:
 558                        wr->ex.imm_data = ibwr->ex.imm_data;
 559                        break;
 560                case IB_WR_SEND_WITH_INV:
 561                        wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
 562                        break;
 563                case IB_WR_ATOMIC_CMP_AND_SWP:
 564                case IB_WR_ATOMIC_FETCH_AND_ADD:
 565                        wr->wr.atomic.remote_addr =
 566                                atomic_wr(ibwr)->remote_addr;
 567                        wr->wr.atomic.compare_add =
 568                                atomic_wr(ibwr)->compare_add;
 569                        wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
 570                        wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
 571                        break;
 572                case IB_WR_LOCAL_INV:
 573                        wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
 574                break;
 575                case IB_WR_REG_MR:
 576                        wr->wr.reg.mr = reg_wr(ibwr)->mr;
 577                        wr->wr.reg.key = reg_wr(ibwr)->key;
 578                        wr->wr.reg.access = reg_wr(ibwr)->access;
 579                break;
 580                default:
 581                        break;
 582                }
 583        }
 584}
 585
 586static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 587                         unsigned int mask, unsigned int length,
 588                         struct rxe_send_wqe *wqe)
 589{
 590        int num_sge = ibwr->num_sge;
 591        struct ib_sge *sge;
 592        int i;
 593        u8 *p;
 594
 595        init_send_wr(qp, &wqe->wr, ibwr);
 596
 597        if (qp_type(qp) == IB_QPT_UD ||
 598            qp_type(qp) == IB_QPT_SMI ||
 599            qp_type(qp) == IB_QPT_GSI)
 600                memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av));
 601
 602        if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) {
 603                p = wqe->dma.inline_data;
 604
 605                sge = ibwr->sg_list;
 606                for (i = 0; i < num_sge; i++, sge++) {
 607                        memcpy(p, (void *)(uintptr_t)sge->addr,
 608                                        sge->length);
 609
 610                        p += sge->length;
 611                }
 612        } else if (mask & WR_REG_MASK) {
 613                wqe->mask = mask;
 614                wqe->state = wqe_state_posted;
 615                return 0;
 616        } else
 617                memcpy(wqe->dma.sge, ibwr->sg_list,
 618                       num_sge * sizeof(struct ib_sge));
 619
 620        wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
 621                mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
 622        wqe->mask               = mask;
 623        wqe->dma.length         = length;
 624        wqe->dma.resid          = length;
 625        wqe->dma.num_sge        = num_sge;
 626        wqe->dma.cur_sge        = 0;
 627        wqe->dma.sge_offset     = 0;
 628        wqe->state              = wqe_state_posted;
 629        wqe->ssn                = atomic_add_return(1, &qp->ssn);
 630
 631        return 0;
 632}
 633
 634static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 635                         unsigned int mask, u32 length)
 636{
 637        int err;
 638        struct rxe_sq *sq = &qp->sq;
 639        struct rxe_send_wqe *send_wqe;
 640        unsigned long flags;
 641
 642        err = validate_send_wr(qp, ibwr, mask, length);
 643        if (err)
 644                return err;
 645
 646        spin_lock_irqsave(&qp->sq.sq_lock, flags);
 647
 648        if (unlikely(queue_full(sq->queue))) {
 649                err = -ENOMEM;
 650                goto err1;
 651        }
 652
 653        send_wqe = producer_addr(sq->queue);
 654
 655        err = init_send_wqe(qp, ibwr, mask, length, send_wqe);
 656        if (unlikely(err))
 657                goto err1;
 658
 659        /*
 660         * make sure all changes to the work queue are
 661         * written before we update the producer pointer
 662         */
 663        smp_wmb();
 664
 665        advance_producer(sq->queue);
 666        spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 667
 668        return 0;
 669
 670err1:
 671        spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 672        return err;
 673}
 674
 675static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
 676                                const struct ib_send_wr **bad_wr)
 677{
 678        int err = 0;
 679        unsigned int mask;
 680        unsigned int length = 0;
 681        int i;
 682
 683        while (wr) {
 684                mask = wr_opcode_mask(wr->opcode, qp);
 685                if (unlikely(!mask)) {
 686                        err = -EINVAL;
 687                        *bad_wr = wr;
 688                        break;
 689                }
 690
 691                if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
 692                             !(mask & WR_INLINE_MASK))) {
 693                        err = -EINVAL;
 694                        *bad_wr = wr;
 695                        break;
 696                }
 697
 698                length = 0;
 699                for (i = 0; i < wr->num_sge; i++)
 700                        length += wr->sg_list[i].length;
 701
 702                err = post_one_send(qp, wr, mask, length);
 703
 704                if (err) {
 705                        *bad_wr = wr;
 706                        break;
 707                }
 708                wr = wr->next;
 709        }
 710
 711        rxe_run_task(&qp->req.task, 1);
 712        if (unlikely(qp->req.state == QP_STATE_ERROR))
 713                rxe_run_task(&qp->comp.task, 1);
 714
 715        return err;
 716}
 717
 718static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 719                         const struct ib_send_wr **bad_wr)
 720{
 721        struct rxe_qp *qp = to_rqp(ibqp);
 722
 723        if (unlikely(!qp->valid)) {
 724                *bad_wr = wr;
 725                return -EINVAL;
 726        }
 727
 728        if (unlikely(qp->req.state < QP_STATE_READY)) {
 729                *bad_wr = wr;
 730                return -EINVAL;
 731        }
 732
 733        if (qp->is_user) {
 734                /* Utilize process context to do protocol processing */
 735                rxe_run_task(&qp->req.task, 0);
 736                return 0;
 737        } else
 738                return rxe_post_send_kernel(qp, wr, bad_wr);
 739}
 740
 741static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 742                         const struct ib_recv_wr **bad_wr)
 743{
 744        int err = 0;
 745        struct rxe_qp *qp = to_rqp(ibqp);
 746        struct rxe_rq *rq = &qp->rq;
 747        unsigned long flags;
 748
 749        if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
 750                *bad_wr = wr;
 751                err = -EINVAL;
 752                goto err1;
 753        }
 754
 755        if (unlikely(qp->srq)) {
 756                *bad_wr = wr;
 757                err = -EINVAL;
 758                goto err1;
 759        }
 760
 761        spin_lock_irqsave(&rq->producer_lock, flags);
 762
 763        while (wr) {
 764                err = post_one_recv(rq, wr);
 765                if (unlikely(err)) {
 766                        *bad_wr = wr;
 767                        break;
 768                }
 769                wr = wr->next;
 770        }
 771
 772        spin_unlock_irqrestore(&rq->producer_lock, flags);
 773
 774        if (qp->resp.state == QP_STATE_ERROR)
 775                rxe_run_task(&qp->resp.task, 1);
 776
 777err1:
 778        return err;
 779}
 780
 781static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 782                         struct ib_udata *udata)
 783{
 784        int err;
 785        struct ib_device *dev = ibcq->device;
 786        struct rxe_dev *rxe = to_rdev(dev);
 787        struct rxe_cq *cq = to_rcq(ibcq);
 788        struct rxe_create_cq_resp __user *uresp = NULL;
 789
 790        if (udata) {
 791                if (udata->outlen < sizeof(*uresp))
 792                        return -EINVAL;
 793                uresp = udata->outbuf;
 794        }
 795
 796        if (attr->flags)
 797                return -EINVAL;
 798
 799        err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
 800        if (err)
 801                return err;
 802
 803        err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata,
 804                               uresp);
 805        if (err)
 806                return err;
 807
 808        return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem);
 809}
 810
 811static void rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 812{
 813        struct rxe_cq *cq = to_rcq(ibcq);
 814
 815        rxe_cq_disable(cq);
 816
 817        rxe_drop_ref(cq);
 818}
 819
 820static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 821{
 822        int err;
 823        struct rxe_cq *cq = to_rcq(ibcq);
 824        struct rxe_dev *rxe = to_rdev(ibcq->device);
 825        struct rxe_resize_cq_resp __user *uresp = NULL;
 826
 827        if (udata) {
 828                if (udata->outlen < sizeof(*uresp))
 829                        return -EINVAL;
 830                uresp = udata->outbuf;
 831        }
 832
 833        err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
 834        if (err)
 835                goto err1;
 836
 837        err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
 838        if (err)
 839                goto err1;
 840
 841        return 0;
 842
 843err1:
 844        return err;
 845}
 846
 847static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 848{
 849        int i;
 850        struct rxe_cq *cq = to_rcq(ibcq);
 851        struct rxe_cqe *cqe;
 852        unsigned long flags;
 853
 854        spin_lock_irqsave(&cq->cq_lock, flags);
 855        for (i = 0; i < num_entries; i++) {
 856                cqe = queue_head(cq->queue);
 857                if (!cqe)
 858                        break;
 859
 860                memcpy(wc++, &cqe->ibwc, sizeof(*wc));
 861                advance_consumer(cq->queue);
 862        }
 863        spin_unlock_irqrestore(&cq->cq_lock, flags);
 864
 865        return i;
 866}
 867
 868static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
 869{
 870        struct rxe_cq *cq = to_rcq(ibcq);
 871        int count = queue_count(cq->queue);
 872
 873        return (count > wc_cnt) ? wc_cnt : count;
 874}
 875
 876static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 877{
 878        struct rxe_cq *cq = to_rcq(ibcq);
 879        unsigned long irq_flags;
 880        int ret = 0;
 881
 882        spin_lock_irqsave(&cq->cq_lock, irq_flags);
 883        if (cq->notify != IB_CQ_NEXT_COMP)
 884                cq->notify = flags & IB_CQ_SOLICITED_MASK;
 885
 886        if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue))
 887                ret = 1;
 888
 889        spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
 890
 891        return ret;
 892}
 893
 894static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
 895{
 896        struct rxe_dev *rxe = to_rdev(ibpd->device);
 897        struct rxe_pd *pd = to_rpd(ibpd);
 898        struct rxe_mem *mr;
 899        int err;
 900
 901        mr = rxe_alloc(&rxe->mr_pool);
 902        if (!mr) {
 903                err = -ENOMEM;
 904                goto err1;
 905        }
 906
 907        rxe_add_index(mr);
 908
 909        rxe_add_ref(pd);
 910
 911        err = rxe_mem_init_dma(pd, access, mr);
 912        if (err)
 913                goto err2;
 914
 915        return &mr->ibmr;
 916
 917err2:
 918        rxe_drop_ref(pd);
 919        rxe_drop_index(mr);
 920        rxe_drop_ref(mr);
 921err1:
 922        return ERR_PTR(err);
 923}
 924
 925static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
 926                                     u64 start,
 927                                     u64 length,
 928                                     u64 iova,
 929                                     int access, struct ib_udata *udata)
 930{
 931        int err;
 932        struct rxe_dev *rxe = to_rdev(ibpd->device);
 933        struct rxe_pd *pd = to_rpd(ibpd);
 934        struct rxe_mem *mr;
 935
 936        mr = rxe_alloc(&rxe->mr_pool);
 937        if (!mr) {
 938                err = -ENOMEM;
 939                goto err2;
 940        }
 941
 942        rxe_add_index(mr);
 943
 944        rxe_add_ref(pd);
 945
 946        err = rxe_mem_init_user(pd, start, length, iova,
 947                                access, udata, mr);
 948        if (err)
 949                goto err3;
 950
 951        return &mr->ibmr;
 952
 953err3:
 954        rxe_drop_ref(pd);
 955        rxe_drop_index(mr);
 956        rxe_drop_ref(mr);
 957err2:
 958        return ERR_PTR(err);
 959}
 960
 961static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 962{
 963        struct rxe_mem *mr = to_rmr(ibmr);
 964
 965        mr->state = RXE_MEM_STATE_ZOMBIE;
 966        rxe_drop_ref(mr->pd);
 967        rxe_drop_index(mr);
 968        rxe_drop_ref(mr);
 969        return 0;
 970}
 971
 972static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 973                                  u32 max_num_sg, struct ib_udata *udata)
 974{
 975        struct rxe_dev *rxe = to_rdev(ibpd->device);
 976        struct rxe_pd *pd = to_rpd(ibpd);
 977        struct rxe_mem *mr;
 978        int err;
 979
 980        if (mr_type != IB_MR_TYPE_MEM_REG)
 981                return ERR_PTR(-EINVAL);
 982
 983        mr = rxe_alloc(&rxe->mr_pool);
 984        if (!mr) {
 985                err = -ENOMEM;
 986                goto err1;
 987        }
 988
 989        rxe_add_index(mr);
 990
 991        rxe_add_ref(pd);
 992
 993        err = rxe_mem_init_fast(pd, max_num_sg, mr);
 994        if (err)
 995                goto err2;
 996
 997        return &mr->ibmr;
 998
 999err2:
1000        rxe_drop_ref(pd);
1001        rxe_drop_index(mr);
1002        rxe_drop_ref(mr);
1003err1:
1004        return ERR_PTR(err);
1005}
1006
1007static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
1008{
1009        struct rxe_mem *mr = to_rmr(ibmr);
1010        struct rxe_map *map;
1011        struct rxe_phys_buf *buf;
1012
1013        if (unlikely(mr->nbuf == mr->num_buf))
1014                return -ENOMEM;
1015
1016        map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
1017        buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
1018
1019        buf->addr = addr;
1020        buf->size = ibmr->page_size;
1021        mr->nbuf++;
1022
1023        return 0;
1024}
1025
1026static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1027                         int sg_nents, unsigned int *sg_offset)
1028{
1029        struct rxe_mem *mr = to_rmr(ibmr);
1030        int n;
1031
1032        mr->nbuf = 0;
1033
1034        n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
1035
1036        mr->va = ibmr->iova;
1037        mr->iova = ibmr->iova;
1038        mr->length = ibmr->length;
1039        mr->page_shift = ilog2(ibmr->page_size);
1040        mr->page_mask = ibmr->page_size - 1;
1041        mr->offset = mr->iova & mr->page_mask;
1042
1043        return n;
1044}
1045
1046static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1047{
1048        int err;
1049        struct rxe_dev *rxe = to_rdev(ibqp->device);
1050        struct rxe_qp *qp = to_rqp(ibqp);
1051        struct rxe_mc_grp *grp;
1052
1053        /* takes a ref on grp if successful */
1054        err = rxe_mcast_get_grp(rxe, mgid, &grp);
1055        if (err)
1056                return err;
1057
1058        err = rxe_mcast_add_grp_elem(rxe, qp, grp);
1059
1060        rxe_drop_ref(grp);
1061        return err;
1062}
1063
1064static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1065{
1066        struct rxe_dev *rxe = to_rdev(ibqp->device);
1067        struct rxe_qp *qp = to_rqp(ibqp);
1068
1069        return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
1070}
1071
1072static ssize_t parent_show(struct device *device,
1073                           struct device_attribute *attr, char *buf)
1074{
1075        struct rxe_dev *rxe =
1076                rdma_device_to_drv_device(device, struct rxe_dev, ib_dev);
1077
1078        return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
1079}
1080
1081static DEVICE_ATTR_RO(parent);
1082
1083static struct attribute *rxe_dev_attributes[] = {
1084        &dev_attr_parent.attr,
1085        NULL
1086};
1087
1088static const struct attribute_group rxe_attr_group = {
1089        .attrs = rxe_dev_attributes,
1090};
1091
1092static int rxe_enable_driver(struct ib_device *ib_dev)
1093{
1094        struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
1095
1096        rxe_set_port_state(rxe);
1097        dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev));
1098        return 0;
1099}
1100
1101static const struct ib_device_ops rxe_dev_ops = {
1102        .owner = THIS_MODULE,
1103        .driver_id = RDMA_DRIVER_RXE,
1104        .uverbs_abi_ver = RXE_UVERBS_ABI_VERSION,
1105
1106        .alloc_hw_stats = rxe_ib_alloc_hw_stats,
1107        .alloc_mr = rxe_alloc_mr,
1108        .alloc_pd = rxe_alloc_pd,
1109        .alloc_ucontext = rxe_alloc_ucontext,
1110        .attach_mcast = rxe_attach_mcast,
1111        .create_ah = rxe_create_ah,
1112        .create_cq = rxe_create_cq,
1113        .create_qp = rxe_create_qp,
1114        .create_srq = rxe_create_srq,
1115        .dealloc_driver = rxe_dealloc,
1116        .dealloc_pd = rxe_dealloc_pd,
1117        .dealloc_ucontext = rxe_dealloc_ucontext,
1118        .dereg_mr = rxe_dereg_mr,
1119        .destroy_ah = rxe_destroy_ah,
1120        .destroy_cq = rxe_destroy_cq,
1121        .destroy_qp = rxe_destroy_qp,
1122        .destroy_srq = rxe_destroy_srq,
1123        .detach_mcast = rxe_detach_mcast,
1124        .enable_driver = rxe_enable_driver,
1125        .get_dma_mr = rxe_get_dma_mr,
1126        .get_hw_stats = rxe_ib_get_hw_stats,
1127        .get_link_layer = rxe_get_link_layer,
1128        .get_port_immutable = rxe_port_immutable,
1129        .map_mr_sg = rxe_map_mr_sg,
1130        .mmap = rxe_mmap,
1131        .modify_ah = rxe_modify_ah,
1132        .modify_device = rxe_modify_device,
1133        .modify_port = rxe_modify_port,
1134        .modify_qp = rxe_modify_qp,
1135        .modify_srq = rxe_modify_srq,
1136        .peek_cq = rxe_peek_cq,
1137        .poll_cq = rxe_poll_cq,
1138        .post_recv = rxe_post_recv,
1139        .post_send = rxe_post_send,
1140        .post_srq_recv = rxe_post_srq_recv,
1141        .query_ah = rxe_query_ah,
1142        .query_device = rxe_query_device,
1143        .query_pkey = rxe_query_pkey,
1144        .query_port = rxe_query_port,
1145        .query_qp = rxe_query_qp,
1146        .query_srq = rxe_query_srq,
1147        .reg_user_mr = rxe_reg_user_mr,
1148        .req_notify_cq = rxe_req_notify_cq,
1149        .resize_cq = rxe_resize_cq,
1150
1151        INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
1152        INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
1153        INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
1154        INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq),
1155        INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
1156};
1157
1158int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
1159{
1160        int err;
1161        struct ib_device *dev = &rxe->ib_dev;
1162        struct crypto_shash *tfm;
1163
1164        strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1165
1166        dev->node_type = RDMA_NODE_IB_CA;
1167        dev->phys_port_cnt = 1;
1168        dev->num_comp_vectors = num_possible_cpus();
1169        dev->dev.parent = rxe_dma_device(rxe);
1170        dev->local_dma_lkey = 0;
1171        addrconf_addr_eui48((unsigned char *)&dev->node_guid,
1172                            rxe->ndev->dev_addr);
1173        dev->dev.dma_ops = &dma_virt_ops;
1174        dma_coerce_mask_and_coherent(&dev->dev,
1175                                     dma_get_required_mask(&dev->dev));
1176
1177        dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
1178            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
1179            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE)
1180            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT)
1181            | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD)
1182            | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD)
1183            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ)
1184            | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ)
1185            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ)
1186            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ)
1187            | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV)
1188            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP)
1189            | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP)
1190            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP)
1191            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP)
1192            | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND)
1193            | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV)
1194            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ)
1195            | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ)
1196            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ)
1197            | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ)
1198            | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ)
1199            | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)
1200            | BIT_ULL(IB_USER_VERBS_CMD_REG_MR)
1201            | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR)
1202            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH)
1203            | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH)
1204            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH)
1205            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH)
1206            | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST)
1207            | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST)
1208            ;
1209
1210        ib_set_device_ops(dev, &rxe_dev_ops);
1211        err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1);
1212        if (err)
1213                return err;
1214
1215        tfm = crypto_alloc_shash("crc32", 0, 0);
1216        if (IS_ERR(tfm)) {
1217                pr_err("failed to allocate crc algorithm err:%ld\n",
1218                       PTR_ERR(tfm));
1219                return PTR_ERR(tfm);
1220        }
1221        rxe->tfm = tfm;
1222
1223        rdma_set_device_sysfs_group(dev, &rxe_attr_group);
1224        err = ib_register_device(dev, ibdev_name);
1225        if (err)
1226                pr_warn("%s failed with error %d\n", __func__, err);
1227
1228        /*
1229         * Note that rxe may be invalid at this point if another thread
1230         * unregistered it.
1231         */
1232        return err;
1233}
1234