linux/drivers/infiniband/sw/rxe/rxe_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/dma-mapping.h>
  35#include <net/addrconf.h>
  36#include "rxe.h"
  37#include "rxe_loc.h"
  38#include "rxe_queue.h"
  39#include "rxe_hw_counters.h"
  40
  41static int rxe_query_device(struct ib_device *dev,
  42                            struct ib_device_attr *attr,
  43                            struct ib_udata *uhw)
  44{
  45        struct rxe_dev *rxe = to_rdev(dev);
  46
  47        if (uhw->inlen || uhw->outlen)
  48                return -EINVAL;
  49
  50        *attr = rxe->attr;
  51        return 0;
  52}
  53
  54static int rxe_query_port(struct ib_device *dev,
  55                          u8 port_num, struct ib_port_attr *attr)
  56{
  57        struct rxe_dev *rxe = to_rdev(dev);
  58        struct rxe_port *port;
  59        int rc = -EINVAL;
  60
  61        if (unlikely(port_num != 1)) {
  62                pr_warn("invalid port_number %d\n", port_num);
  63                goto out;
  64        }
  65
  66        port = &rxe->port;
  67
  68        /* *attr being zeroed by the caller, avoid zeroing it here */
  69        *attr = port->attr;
  70
  71        mutex_lock(&rxe->usdev_lock);
  72        rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
  73                              &attr->active_width);
  74        mutex_unlock(&rxe->usdev_lock);
  75
  76out:
  77        return rc;
  78}
  79
  80static struct net_device *rxe_get_netdev(struct ib_device *device,
  81                                         u8 port_num)
  82{
  83        struct rxe_dev *rxe = to_rdev(device);
  84
  85        if (rxe->ndev) {
  86                dev_hold(rxe->ndev);
  87                return rxe->ndev;
  88        }
  89
  90        return NULL;
  91}
  92
  93static int rxe_query_pkey(struct ib_device *device,
  94                          u8 port_num, u16 index, u16 *pkey)
  95{
  96        struct rxe_dev *rxe = to_rdev(device);
  97        struct rxe_port *port;
  98
  99        if (unlikely(port_num != 1)) {
 100                dev_warn(device->dev.parent, "invalid port_num = %d\n",
 101                         port_num);
 102                goto err1;
 103        }
 104
 105        port = &rxe->port;
 106
 107        if (unlikely(index >= port->attr.pkey_tbl_len)) {
 108                dev_warn(device->dev.parent, "invalid index = %d\n",
 109                         index);
 110                goto err1;
 111        }
 112
 113        *pkey = port->pkey_tbl[index];
 114        return 0;
 115
 116err1:
 117        return -EINVAL;
 118}
 119
 120static int rxe_modify_device(struct ib_device *dev,
 121                             int mask, struct ib_device_modify *attr)
 122{
 123        struct rxe_dev *rxe = to_rdev(dev);
 124
 125        if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
 126                rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
 127
 128        if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
 129                memcpy(rxe->ib_dev.node_desc,
 130                       attr->node_desc, sizeof(rxe->ib_dev.node_desc));
 131        }
 132
 133        return 0;
 134}
 135
 136static int rxe_modify_port(struct ib_device *dev,
 137                           u8 port_num, int mask, struct ib_port_modify *attr)
 138{
 139        struct rxe_dev *rxe = to_rdev(dev);
 140        struct rxe_port *port;
 141
 142        if (unlikely(port_num != 1)) {
 143                pr_warn("invalid port_num = %d\n", port_num);
 144                goto err1;
 145        }
 146
 147        port = &rxe->port;
 148
 149        port->attr.port_cap_flags |= attr->set_port_cap_mask;
 150        port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
 151
 152        if (mask & IB_PORT_RESET_QKEY_CNTR)
 153                port->attr.qkey_viol_cntr = 0;
 154
 155        return 0;
 156
 157err1:
 158        return -EINVAL;
 159}
 160
 161static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
 162                                               u8 port_num)
 163{
 164        struct rxe_dev *rxe = to_rdev(dev);
 165
 166        return rxe_link_layer(rxe, port_num);
 167}
 168
 169static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev,
 170                                              struct ib_udata *udata)
 171{
 172        struct rxe_dev *rxe = to_rdev(dev);
 173        struct rxe_ucontext *uc;
 174
 175        uc = rxe_alloc(&rxe->uc_pool);
 176        return uc ? &uc->ibuc : ERR_PTR(-ENOMEM);
 177}
 178
 179static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
 180{
 181        struct rxe_ucontext *uc = to_ruc(ibuc);
 182
 183        rxe_drop_ref(uc);
 184        return 0;
 185}
 186
 187static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
 188                              struct ib_port_immutable *immutable)
 189{
 190        int err;
 191        struct ib_port_attr attr;
 192
 193        immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
 194
 195        err = ib_query_port(dev, port_num, &attr);
 196        if (err)
 197                return err;
 198
 199        immutable->pkey_tbl_len = attr.pkey_tbl_len;
 200        immutable->gid_tbl_len = attr.gid_tbl_len;
 201        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 202
 203        return 0;
 204}
 205
 206static struct ib_pd *rxe_alloc_pd(struct ib_device *dev,
 207                                  struct ib_ucontext *context,
 208                                  struct ib_udata *udata)
 209{
 210        struct rxe_dev *rxe = to_rdev(dev);
 211        struct rxe_pd *pd;
 212
 213        pd = rxe_alloc(&rxe->pd_pool);
 214        return pd ? &pd->ibpd : ERR_PTR(-ENOMEM);
 215}
 216
 217static int rxe_dealloc_pd(struct ib_pd *ibpd)
 218{
 219        struct rxe_pd *pd = to_rpd(ibpd);
 220
 221        rxe_drop_ref(pd);
 222        return 0;
 223}
 224
 225static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
 226                        struct rxe_av *av)
 227{
 228        rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
 229        rxe_av_fill_ip_info(av, attr);
 230}
 231
 232static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
 233                                   struct rdma_ah_attr *attr,
 234                                   struct ib_udata *udata)
 235
 236{
 237        int err;
 238        struct rxe_dev *rxe = to_rdev(ibpd->device);
 239        struct rxe_pd *pd = to_rpd(ibpd);
 240        struct rxe_ah *ah;
 241
 242        err = rxe_av_chk_attr(rxe, attr);
 243        if (err)
 244                return ERR_PTR(err);
 245
 246        ah = rxe_alloc(&rxe->ah_pool);
 247        if (!ah)
 248                return ERR_PTR(-ENOMEM);
 249
 250        rxe_add_ref(pd);
 251        ah->pd = pd;
 252
 253        rxe_init_av(rxe, attr, &ah->av);
 254        return &ah->ibah;
 255}
 256
 257static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 258{
 259        int err;
 260        struct rxe_dev *rxe = to_rdev(ibah->device);
 261        struct rxe_ah *ah = to_rah(ibah);
 262
 263        err = rxe_av_chk_attr(rxe, attr);
 264        if (err)
 265                return err;
 266
 267        rxe_init_av(rxe, attr, &ah->av);
 268        return 0;
 269}
 270
 271static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 272{
 273        struct rxe_ah *ah = to_rah(ibah);
 274
 275        memset(attr, 0, sizeof(*attr));
 276        attr->type = ibah->type;
 277        rxe_av_to_attr(&ah->av, attr);
 278        return 0;
 279}
 280
 281static int rxe_destroy_ah(struct ib_ah *ibah)
 282{
 283        struct rxe_ah *ah = to_rah(ibah);
 284
 285        rxe_drop_ref(ah->pd);
 286        rxe_drop_ref(ah);
 287        return 0;
 288}
 289
 290static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
 291{
 292        int err;
 293        int i;
 294        u32 length;
 295        struct rxe_recv_wqe *recv_wqe;
 296        int num_sge = ibwr->num_sge;
 297
 298        if (unlikely(queue_full(rq->queue))) {
 299                err = -ENOMEM;
 300                goto err1;
 301        }
 302
 303        if (unlikely(num_sge > rq->max_sge)) {
 304                err = -EINVAL;
 305                goto err1;
 306        }
 307
 308        length = 0;
 309        for (i = 0; i < num_sge; i++)
 310                length += ibwr->sg_list[i].length;
 311
 312        recv_wqe = producer_addr(rq->queue);
 313        recv_wqe->wr_id = ibwr->wr_id;
 314        recv_wqe->num_sge = num_sge;
 315
 316        memcpy(recv_wqe->dma.sge, ibwr->sg_list,
 317               num_sge * sizeof(struct ib_sge));
 318
 319        recv_wqe->dma.length            = length;
 320        recv_wqe->dma.resid             = length;
 321        recv_wqe->dma.num_sge           = num_sge;
 322        recv_wqe->dma.cur_sge           = 0;
 323        recv_wqe->dma.sge_offset        = 0;
 324
 325        /* make sure all changes to the work queue are written before we
 326         * update the producer pointer
 327         */
 328        smp_wmb();
 329
 330        advance_producer(rq->queue);
 331        return 0;
 332
 333err1:
 334        return err;
 335}
 336
 337static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
 338                                     struct ib_srq_init_attr *init,
 339                                     struct ib_udata *udata)
 340{
 341        int err;
 342        struct rxe_dev *rxe = to_rdev(ibpd->device);
 343        struct rxe_pd *pd = to_rpd(ibpd);
 344        struct rxe_srq *srq;
 345        struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
 346        struct rxe_create_srq_resp __user *uresp = NULL;
 347
 348        if (udata) {
 349                if (udata->outlen < sizeof(*uresp))
 350                        return ERR_PTR(-EINVAL);
 351                uresp = udata->outbuf;
 352        }
 353
 354        err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
 355        if (err)
 356                goto err1;
 357
 358        srq = rxe_alloc(&rxe->srq_pool);
 359        if (!srq) {
 360                err = -ENOMEM;
 361                goto err1;
 362        }
 363
 364        rxe_add_index(srq);
 365        rxe_add_ref(pd);
 366        srq->pd = pd;
 367
 368        err = rxe_srq_from_init(rxe, srq, init, context, uresp);
 369        if (err)
 370                goto err2;
 371
 372        return &srq->ibsrq;
 373
 374err2:
 375        rxe_drop_ref(pd);
 376        rxe_drop_index(srq);
 377        rxe_drop_ref(srq);
 378err1:
 379        return ERR_PTR(err);
 380}
 381
 382static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 383                          enum ib_srq_attr_mask mask,
 384                          struct ib_udata *udata)
 385{
 386        int err;
 387        struct rxe_srq *srq = to_rsrq(ibsrq);
 388        struct rxe_dev *rxe = to_rdev(ibsrq->device);
 389        struct rxe_modify_srq_cmd ucmd = {};
 390
 391        if (udata) {
 392                if (udata->inlen < sizeof(ucmd))
 393                        return -EINVAL;
 394
 395                err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
 396                if (err)
 397                        return err;
 398        }
 399
 400        err = rxe_srq_chk_attr(rxe, srq, attr, mask);
 401        if (err)
 402                goto err1;
 403
 404        err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd);
 405        if (err)
 406                goto err1;
 407
 408        return 0;
 409
 410err1:
 411        return err;
 412}
 413
 414static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
 415{
 416        struct rxe_srq *srq = to_rsrq(ibsrq);
 417
 418        if (srq->error)
 419                return -EINVAL;
 420
 421        attr->max_wr = srq->rq.queue->buf->index_mask;
 422        attr->max_sge = srq->rq.max_sge;
 423        attr->srq_limit = srq->limit;
 424        return 0;
 425}
 426
 427static int rxe_destroy_srq(struct ib_srq *ibsrq)
 428{
 429        struct rxe_srq *srq = to_rsrq(ibsrq);
 430
 431        if (srq->rq.queue)
 432                rxe_queue_cleanup(srq->rq.queue);
 433
 434        rxe_drop_ref(srq->pd);
 435        rxe_drop_index(srq);
 436        rxe_drop_ref(srq);
 437
 438        return 0;
 439}
 440
 441static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 442                             const struct ib_recv_wr **bad_wr)
 443{
 444        int err = 0;
 445        unsigned long flags;
 446        struct rxe_srq *srq = to_rsrq(ibsrq);
 447
 448        spin_lock_irqsave(&srq->rq.producer_lock, flags);
 449
 450        while (wr) {
 451                err = post_one_recv(&srq->rq, wr);
 452                if (unlikely(err))
 453                        break;
 454                wr = wr->next;
 455        }
 456
 457        spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
 458
 459        if (err)
 460                *bad_wr = wr;
 461
 462        return err;
 463}
 464
 465static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
 466                                   struct ib_qp_init_attr *init,
 467                                   struct ib_udata *udata)
 468{
 469        int err;
 470        struct rxe_dev *rxe = to_rdev(ibpd->device);
 471        struct rxe_pd *pd = to_rpd(ibpd);
 472        struct rxe_qp *qp;
 473        struct rxe_create_qp_resp __user *uresp = NULL;
 474
 475        if (udata) {
 476                if (udata->outlen < sizeof(*uresp))
 477                        return ERR_PTR(-EINVAL);
 478                uresp = udata->outbuf;
 479        }
 480
 481        err = rxe_qp_chk_init(rxe, init);
 482        if (err)
 483                goto err1;
 484
 485        qp = rxe_alloc(&rxe->qp_pool);
 486        if (!qp) {
 487                err = -ENOMEM;
 488                goto err1;
 489        }
 490
 491        if (udata) {
 492                if (udata->inlen) {
 493                        err = -EINVAL;
 494                        goto err2;
 495                }
 496                qp->is_user = 1;
 497        }
 498
 499        rxe_add_index(qp);
 500
 501        err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd);
 502        if (err)
 503                goto err3;
 504
 505        return &qp->ibqp;
 506
 507err3:
 508        rxe_drop_index(qp);
 509err2:
 510        rxe_drop_ref(qp);
 511err1:
 512        return ERR_PTR(err);
 513}
 514
 515static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 516                         int mask, struct ib_udata *udata)
 517{
 518        int err;
 519        struct rxe_dev *rxe = to_rdev(ibqp->device);
 520        struct rxe_qp *qp = to_rqp(ibqp);
 521
 522        err = rxe_qp_chk_attr(rxe, qp, attr, mask);
 523        if (err)
 524                goto err1;
 525
 526        err = rxe_qp_from_attr(qp, attr, mask, udata);
 527        if (err)
 528                goto err1;
 529
 530        return 0;
 531
 532err1:
 533        return err;
 534}
 535
 536static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 537                        int mask, struct ib_qp_init_attr *init)
 538{
 539        struct rxe_qp *qp = to_rqp(ibqp);
 540
 541        rxe_qp_to_init(qp, init);
 542        rxe_qp_to_attr(qp, attr, mask);
 543
 544        return 0;
 545}
 546
 547static int rxe_destroy_qp(struct ib_qp *ibqp)
 548{
 549        struct rxe_qp *qp = to_rqp(ibqp);
 550
 551        rxe_qp_destroy(qp);
 552        rxe_drop_index(qp);
 553        rxe_drop_ref(qp);
 554        return 0;
 555}
 556
 557static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 558                            unsigned int mask, unsigned int length)
 559{
 560        int num_sge = ibwr->num_sge;
 561        struct rxe_sq *sq = &qp->sq;
 562
 563        if (unlikely(num_sge > sq->max_sge))
 564                goto err1;
 565
 566        if (unlikely(mask & WR_ATOMIC_MASK)) {
 567                if (length < 8)
 568                        goto err1;
 569
 570                if (atomic_wr(ibwr)->remote_addr & 0x7)
 571                        goto err1;
 572        }
 573
 574        if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
 575                     (length > sq->max_inline)))
 576                goto err1;
 577
 578        return 0;
 579
 580err1:
 581        return -EINVAL;
 582}
 583
 584static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
 585                         const struct ib_send_wr *ibwr)
 586{
 587        wr->wr_id = ibwr->wr_id;
 588        wr->num_sge = ibwr->num_sge;
 589        wr->opcode = ibwr->opcode;
 590        wr->send_flags = ibwr->send_flags;
 591
 592        if (qp_type(qp) == IB_QPT_UD ||
 593            qp_type(qp) == IB_QPT_SMI ||
 594            qp_type(qp) == IB_QPT_GSI) {
 595                wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
 596                wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
 597                if (qp_type(qp) == IB_QPT_GSI)
 598                        wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
 599                if (wr->opcode == IB_WR_SEND_WITH_IMM)
 600                        wr->ex.imm_data = ibwr->ex.imm_data;
 601        } else {
 602                switch (wr->opcode) {
 603                case IB_WR_RDMA_WRITE_WITH_IMM:
 604                        wr->ex.imm_data = ibwr->ex.imm_data;
 605                        /* fall through */
 606                case IB_WR_RDMA_READ:
 607                case IB_WR_RDMA_WRITE:
 608                        wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
 609                        wr->wr.rdma.rkey        = rdma_wr(ibwr)->rkey;
 610                        break;
 611                case IB_WR_SEND_WITH_IMM:
 612                        wr->ex.imm_data = ibwr->ex.imm_data;
 613                        break;
 614                case IB_WR_SEND_WITH_INV:
 615                        wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
 616                        break;
 617                case IB_WR_ATOMIC_CMP_AND_SWP:
 618                case IB_WR_ATOMIC_FETCH_AND_ADD:
 619                        wr->wr.atomic.remote_addr =
 620                                atomic_wr(ibwr)->remote_addr;
 621                        wr->wr.atomic.compare_add =
 622                                atomic_wr(ibwr)->compare_add;
 623                        wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
 624                        wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
 625                        break;
 626                case IB_WR_LOCAL_INV:
 627                        wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
 628                break;
 629                case IB_WR_REG_MR:
 630                        wr->wr.reg.mr = reg_wr(ibwr)->mr;
 631                        wr->wr.reg.key = reg_wr(ibwr)->key;
 632                        wr->wr.reg.access = reg_wr(ibwr)->access;
 633                break;
 634                default:
 635                        break;
 636                }
 637        }
 638}
 639
 640static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 641                         unsigned int mask, unsigned int length,
 642                         struct rxe_send_wqe *wqe)
 643{
 644        int num_sge = ibwr->num_sge;
 645        struct ib_sge *sge;
 646        int i;
 647        u8 *p;
 648
 649        init_send_wr(qp, &wqe->wr, ibwr);
 650
 651        if (qp_type(qp) == IB_QPT_UD ||
 652            qp_type(qp) == IB_QPT_SMI ||
 653            qp_type(qp) == IB_QPT_GSI)
 654                memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av));
 655
 656        if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) {
 657                p = wqe->dma.inline_data;
 658
 659                sge = ibwr->sg_list;
 660                for (i = 0; i < num_sge; i++, sge++) {
 661                        memcpy(p, (void *)(uintptr_t)sge->addr,
 662                                        sge->length);
 663
 664                        p += sge->length;
 665                }
 666        } else if (mask & WR_REG_MASK) {
 667                wqe->mask = mask;
 668                wqe->state = wqe_state_posted;
 669                return 0;
 670        } else
 671                memcpy(wqe->dma.sge, ibwr->sg_list,
 672                       num_sge * sizeof(struct ib_sge));
 673
 674        wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
 675                mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
 676        wqe->mask               = mask;
 677        wqe->dma.length         = length;
 678        wqe->dma.resid          = length;
 679        wqe->dma.num_sge        = num_sge;
 680        wqe->dma.cur_sge        = 0;
 681        wqe->dma.sge_offset     = 0;
 682        wqe->state              = wqe_state_posted;
 683        wqe->ssn                = atomic_add_return(1, &qp->ssn);
 684
 685        return 0;
 686}
 687
 688static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 689                         unsigned int mask, u32 length)
 690{
 691        int err;
 692        struct rxe_sq *sq = &qp->sq;
 693        struct rxe_send_wqe *send_wqe;
 694        unsigned long flags;
 695
 696        err = validate_send_wr(qp, ibwr, mask, length);
 697        if (err)
 698                return err;
 699
 700        spin_lock_irqsave(&qp->sq.sq_lock, flags);
 701
 702        if (unlikely(queue_full(sq->queue))) {
 703                err = -ENOMEM;
 704                goto err1;
 705        }
 706
 707        send_wqe = producer_addr(sq->queue);
 708
 709        err = init_send_wqe(qp, ibwr, mask, length, send_wqe);
 710        if (unlikely(err))
 711                goto err1;
 712
 713        /*
 714         * make sure all changes to the work queue are
 715         * written before we update the producer pointer
 716         */
 717        smp_wmb();
 718
 719        advance_producer(sq->queue);
 720        spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 721
 722        return 0;
 723
 724err1:
 725        spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 726        return err;
 727}
 728
 729static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
 730                                const struct ib_send_wr **bad_wr)
 731{
 732        int err = 0;
 733        unsigned int mask;
 734        unsigned int length = 0;
 735        int i;
 736
 737        while (wr) {
 738                mask = wr_opcode_mask(wr->opcode, qp);
 739                if (unlikely(!mask)) {
 740                        err = -EINVAL;
 741                        *bad_wr = wr;
 742                        break;
 743                }
 744
 745                if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
 746                             !(mask & WR_INLINE_MASK))) {
 747                        err = -EINVAL;
 748                        *bad_wr = wr;
 749                        break;
 750                }
 751
 752                length = 0;
 753                for (i = 0; i < wr->num_sge; i++)
 754                        length += wr->sg_list[i].length;
 755
 756                err = post_one_send(qp, wr, mask, length);
 757
 758                if (err) {
 759                        *bad_wr = wr;
 760                        break;
 761                }
 762                wr = wr->next;
 763        }
 764
 765        rxe_run_task(&qp->req.task, 1);
 766        if (unlikely(qp->req.state == QP_STATE_ERROR))
 767                rxe_run_task(&qp->comp.task, 1);
 768
 769        return err;
 770}
 771
 772static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 773                         const struct ib_send_wr **bad_wr)
 774{
 775        struct rxe_qp *qp = to_rqp(ibqp);
 776
 777        if (unlikely(!qp->valid)) {
 778                *bad_wr = wr;
 779                return -EINVAL;
 780        }
 781
 782        if (unlikely(qp->req.state < QP_STATE_READY)) {
 783                *bad_wr = wr;
 784                return -EINVAL;
 785        }
 786
 787        if (qp->is_user) {
 788                /* Utilize process context to do protocol processing */
 789                rxe_run_task(&qp->req.task, 0);
 790                return 0;
 791        } else
 792                return rxe_post_send_kernel(qp, wr, bad_wr);
 793}
 794
 795static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 796                         const struct ib_recv_wr **bad_wr)
 797{
 798        int err = 0;
 799        struct rxe_qp *qp = to_rqp(ibqp);
 800        struct rxe_rq *rq = &qp->rq;
 801        unsigned long flags;
 802
 803        if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
 804                *bad_wr = wr;
 805                err = -EINVAL;
 806                goto err1;
 807        }
 808
 809        if (unlikely(qp->srq)) {
 810                *bad_wr = wr;
 811                err = -EINVAL;
 812                goto err1;
 813        }
 814
 815        spin_lock_irqsave(&rq->producer_lock, flags);
 816
 817        while (wr) {
 818                err = post_one_recv(rq, wr);
 819                if (unlikely(err)) {
 820                        *bad_wr = wr;
 821                        break;
 822                }
 823                wr = wr->next;
 824        }
 825
 826        spin_unlock_irqrestore(&rq->producer_lock, flags);
 827
 828        if (qp->resp.state == QP_STATE_ERROR)
 829                rxe_run_task(&qp->resp.task, 1);
 830
 831err1:
 832        return err;
 833}
 834
 835static struct ib_cq *rxe_create_cq(struct ib_device *dev,
 836                                   const struct ib_cq_init_attr *attr,
 837                                   struct ib_ucontext *context,
 838                                   struct ib_udata *udata)
 839{
 840        int err;
 841        struct rxe_dev *rxe = to_rdev(dev);
 842        struct rxe_cq *cq;
 843        struct rxe_create_cq_resp __user *uresp = NULL;
 844
 845        if (udata) {
 846                if (udata->outlen < sizeof(*uresp))
 847                        return ERR_PTR(-EINVAL);
 848                uresp = udata->outbuf;
 849        }
 850
 851        if (attr->flags)
 852                return ERR_PTR(-EINVAL);
 853
 854        err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
 855        if (err)
 856                goto err1;
 857
 858        cq = rxe_alloc(&rxe->cq_pool);
 859        if (!cq) {
 860                err = -ENOMEM;
 861                goto err1;
 862        }
 863
 864        err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector,
 865                               context, uresp);
 866        if (err)
 867                goto err2;
 868
 869        return &cq->ibcq;
 870
 871err2:
 872        rxe_drop_ref(cq);
 873err1:
 874        return ERR_PTR(err);
 875}
 876
 877static int rxe_destroy_cq(struct ib_cq *ibcq)
 878{
 879        struct rxe_cq *cq = to_rcq(ibcq);
 880
 881        rxe_cq_disable(cq);
 882
 883        rxe_drop_ref(cq);
 884        return 0;
 885}
 886
 887static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 888{
 889        int err;
 890        struct rxe_cq *cq = to_rcq(ibcq);
 891        struct rxe_dev *rxe = to_rdev(ibcq->device);
 892        struct rxe_resize_cq_resp __user *uresp = NULL;
 893
 894        if (udata) {
 895                if (udata->outlen < sizeof(*uresp))
 896                        return -EINVAL;
 897                uresp = udata->outbuf;
 898        }
 899
 900        err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
 901        if (err)
 902                goto err1;
 903
 904        err = rxe_cq_resize_queue(cq, cqe, uresp);
 905        if (err)
 906                goto err1;
 907
 908        return 0;
 909
 910err1:
 911        return err;
 912}
 913
 914static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 915{
 916        int i;
 917        struct rxe_cq *cq = to_rcq(ibcq);
 918        struct rxe_cqe *cqe;
 919        unsigned long flags;
 920
 921        spin_lock_irqsave(&cq->cq_lock, flags);
 922        for (i = 0; i < num_entries; i++) {
 923                cqe = queue_head(cq->queue);
 924                if (!cqe)
 925                        break;
 926
 927                memcpy(wc++, &cqe->ibwc, sizeof(*wc));
 928                advance_consumer(cq->queue);
 929        }
 930        spin_unlock_irqrestore(&cq->cq_lock, flags);
 931
 932        return i;
 933}
 934
 935static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
 936{
 937        struct rxe_cq *cq = to_rcq(ibcq);
 938        int count = queue_count(cq->queue);
 939
 940        return (count > wc_cnt) ? wc_cnt : count;
 941}
 942
 943static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 944{
 945        struct rxe_cq *cq = to_rcq(ibcq);
 946        unsigned long irq_flags;
 947        int ret = 0;
 948
 949        spin_lock_irqsave(&cq->cq_lock, irq_flags);
 950        if (cq->notify != IB_CQ_NEXT_COMP)
 951                cq->notify = flags & IB_CQ_SOLICITED_MASK;
 952
 953        if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue))
 954                ret = 1;
 955
 956        spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
 957
 958        return ret;
 959}
 960
 961static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
 962{
 963        struct rxe_dev *rxe = to_rdev(ibpd->device);
 964        struct rxe_pd *pd = to_rpd(ibpd);
 965        struct rxe_mem *mr;
 966        int err;
 967
 968        mr = rxe_alloc(&rxe->mr_pool);
 969        if (!mr) {
 970                err = -ENOMEM;
 971                goto err1;
 972        }
 973
 974        rxe_add_index(mr);
 975
 976        rxe_add_ref(pd);
 977
 978        err = rxe_mem_init_dma(pd, access, mr);
 979        if (err)
 980                goto err2;
 981
 982        return &mr->ibmr;
 983
 984err2:
 985        rxe_drop_ref(pd);
 986        rxe_drop_index(mr);
 987        rxe_drop_ref(mr);
 988err1:
 989        return ERR_PTR(err);
 990}
 991
 992static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
 993                                     u64 start,
 994                                     u64 length,
 995                                     u64 iova,
 996                                     int access, struct ib_udata *udata)
 997{
 998        int err;
 999        struct rxe_dev *rxe = to_rdev(ibpd->device);
1000        struct rxe_pd *pd = to_rpd(ibpd);
1001        struct rxe_mem *mr;
1002
1003        mr = rxe_alloc(&rxe->mr_pool);
1004        if (!mr) {
1005                err = -ENOMEM;
1006                goto err2;
1007        }
1008
1009        rxe_add_index(mr);
1010
1011        rxe_add_ref(pd);
1012
1013        err = rxe_mem_init_user(pd, start, length, iova,
1014                                access, udata, mr);
1015        if (err)
1016                goto err3;
1017
1018        return &mr->ibmr;
1019
1020err3:
1021        rxe_drop_ref(pd);
1022        rxe_drop_index(mr);
1023        rxe_drop_ref(mr);
1024err2:
1025        return ERR_PTR(err);
1026}
1027
1028static int rxe_dereg_mr(struct ib_mr *ibmr)
1029{
1030        struct rxe_mem *mr = to_rmr(ibmr);
1031
1032        mr->state = RXE_MEM_STATE_ZOMBIE;
1033        rxe_drop_ref(mr->pd);
1034        rxe_drop_index(mr);
1035        rxe_drop_ref(mr);
1036        return 0;
1037}
1038
1039static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd,
1040                                  enum ib_mr_type mr_type,
1041                                  u32 max_num_sg)
1042{
1043        struct rxe_dev *rxe = to_rdev(ibpd->device);
1044        struct rxe_pd *pd = to_rpd(ibpd);
1045        struct rxe_mem *mr;
1046        int err;
1047
1048        if (mr_type != IB_MR_TYPE_MEM_REG)
1049                return ERR_PTR(-EINVAL);
1050
1051        mr = rxe_alloc(&rxe->mr_pool);
1052        if (!mr) {
1053                err = -ENOMEM;
1054                goto err1;
1055        }
1056
1057        rxe_add_index(mr);
1058
1059        rxe_add_ref(pd);
1060
1061        err = rxe_mem_init_fast(pd, max_num_sg, mr);
1062        if (err)
1063                goto err2;
1064
1065        return &mr->ibmr;
1066
1067err2:
1068        rxe_drop_ref(pd);
1069        rxe_drop_index(mr);
1070        rxe_drop_ref(mr);
1071err1:
1072        return ERR_PTR(err);
1073}
1074
1075static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
1076{
1077        struct rxe_mem *mr = to_rmr(ibmr);
1078        struct rxe_map *map;
1079        struct rxe_phys_buf *buf;
1080
1081        if (unlikely(mr->nbuf == mr->num_buf))
1082                return -ENOMEM;
1083
1084        map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
1085        buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
1086
1087        buf->addr = addr;
1088        buf->size = ibmr->page_size;
1089        mr->nbuf++;
1090
1091        return 0;
1092}
1093
1094static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1095                         int sg_nents, unsigned int *sg_offset)
1096{
1097        struct rxe_mem *mr = to_rmr(ibmr);
1098        int n;
1099
1100        mr->nbuf = 0;
1101
1102        n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
1103
1104        mr->va = ibmr->iova;
1105        mr->iova = ibmr->iova;
1106        mr->length = ibmr->length;
1107        mr->page_shift = ilog2(ibmr->page_size);
1108        mr->page_mask = ibmr->page_size - 1;
1109        mr->offset = mr->iova & mr->page_mask;
1110
1111        return n;
1112}
1113
1114static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1115{
1116        int err;
1117        struct rxe_dev *rxe = to_rdev(ibqp->device);
1118        struct rxe_qp *qp = to_rqp(ibqp);
1119        struct rxe_mc_grp *grp;
1120
1121        /* takes a ref on grp if successful */
1122        err = rxe_mcast_get_grp(rxe, mgid, &grp);
1123        if (err)
1124                return err;
1125
1126        err = rxe_mcast_add_grp_elem(rxe, qp, grp);
1127
1128        rxe_drop_ref(grp);
1129        return err;
1130}
1131
1132static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1133{
1134        struct rxe_dev *rxe = to_rdev(ibqp->device);
1135        struct rxe_qp *qp = to_rqp(ibqp);
1136
1137        return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
1138}
1139
1140static ssize_t parent_show(struct device *device,
1141                           struct device_attribute *attr, char *buf)
1142{
1143        struct rxe_dev *rxe = container_of(device, struct rxe_dev,
1144                                           ib_dev.dev);
1145
1146        return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
1147}
1148
1149static DEVICE_ATTR_RO(parent);
1150
1151static struct device_attribute *rxe_dev_attributes[] = {
1152        &dev_attr_parent,
1153};
1154
1155int rxe_register_device(struct rxe_dev *rxe)
1156{
1157        int err;
1158        int i;
1159        struct ib_device *dev = &rxe->ib_dev;
1160        struct crypto_shash *tfm;
1161
1162        strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX);
1163        strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1164
1165        dev->owner = THIS_MODULE;
1166        dev->node_type = RDMA_NODE_IB_CA;
1167        dev->phys_port_cnt = 1;
1168        dev->num_comp_vectors = num_possible_cpus();
1169        dev->dev.parent = rxe_dma_device(rxe);
1170        dev->local_dma_lkey = 0;
1171        addrconf_addr_eui48((unsigned char *)&dev->node_guid,
1172                            rxe->ndev->dev_addr);
1173        dev->dev.dma_ops = &dma_virt_ops;
1174        dma_coerce_mask_and_coherent(&dev->dev,
1175                                     dma_get_required_mask(&dev->dev));
1176
1177        dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
1178        dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
1179            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
1180            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE)
1181            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT)
1182            | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD)
1183            | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD)
1184            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ)
1185            | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ)
1186            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ)
1187            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ)
1188            | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV)
1189            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP)
1190            | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP)
1191            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP)
1192            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP)
1193            | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND)
1194            | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV)
1195            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ)
1196            | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ)
1197            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ)
1198            | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ)
1199            | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ)
1200            | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)
1201            | BIT_ULL(IB_USER_VERBS_CMD_REG_MR)
1202            | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR)
1203            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH)
1204            | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH)
1205            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH)
1206            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH)
1207            | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST)
1208            | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST)
1209            ;
1210
1211        dev->query_device = rxe_query_device;
1212        dev->modify_device = rxe_modify_device;
1213        dev->query_port = rxe_query_port;
1214        dev->modify_port = rxe_modify_port;
1215        dev->get_link_layer = rxe_get_link_layer;
1216        dev->get_netdev = rxe_get_netdev;
1217        dev->query_pkey = rxe_query_pkey;
1218        dev->alloc_ucontext = rxe_alloc_ucontext;
1219        dev->dealloc_ucontext = rxe_dealloc_ucontext;
1220        dev->mmap = rxe_mmap;
1221        dev->get_port_immutable = rxe_port_immutable;
1222        dev->alloc_pd = rxe_alloc_pd;
1223        dev->dealloc_pd = rxe_dealloc_pd;
1224        dev->create_ah = rxe_create_ah;
1225        dev->modify_ah = rxe_modify_ah;
1226        dev->query_ah = rxe_query_ah;
1227        dev->destroy_ah = rxe_destroy_ah;
1228        dev->create_srq = rxe_create_srq;
1229        dev->modify_srq = rxe_modify_srq;
1230        dev->query_srq = rxe_query_srq;
1231        dev->destroy_srq = rxe_destroy_srq;
1232        dev->post_srq_recv = rxe_post_srq_recv;
1233        dev->create_qp = rxe_create_qp;
1234        dev->modify_qp = rxe_modify_qp;
1235        dev->query_qp = rxe_query_qp;
1236        dev->destroy_qp = rxe_destroy_qp;
1237        dev->post_send = rxe_post_send;
1238        dev->post_recv = rxe_post_recv;
1239        dev->create_cq = rxe_create_cq;
1240        dev->destroy_cq = rxe_destroy_cq;
1241        dev->resize_cq = rxe_resize_cq;
1242        dev->poll_cq = rxe_poll_cq;
1243        dev->peek_cq = rxe_peek_cq;
1244        dev->req_notify_cq = rxe_req_notify_cq;
1245        dev->get_dma_mr = rxe_get_dma_mr;
1246        dev->reg_user_mr = rxe_reg_user_mr;
1247        dev->dereg_mr = rxe_dereg_mr;
1248        dev->alloc_mr = rxe_alloc_mr;
1249        dev->map_mr_sg = rxe_map_mr_sg;
1250        dev->attach_mcast = rxe_attach_mcast;
1251        dev->detach_mcast = rxe_detach_mcast;
1252        dev->get_hw_stats = rxe_ib_get_hw_stats;
1253        dev->alloc_hw_stats = rxe_ib_alloc_hw_stats;
1254
1255        tfm = crypto_alloc_shash("crc32", 0, 0);
1256        if (IS_ERR(tfm)) {
1257                pr_err("failed to allocate crc algorithm err:%ld\n",
1258                       PTR_ERR(tfm));
1259                return PTR_ERR(tfm);
1260        }
1261        rxe->tfm = tfm;
1262
1263        dev->driver_id = RDMA_DRIVER_RXE;
1264        err = ib_register_device(dev, NULL);
1265        if (err) {
1266                pr_warn("%s failed with error %d\n", __func__, err);
1267                goto err1;
1268        }
1269
1270        for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
1271                err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
1272                if (err) {
1273                        pr_warn("%s failed with error %d for attr number %d\n",
1274                                __func__, err, i);
1275                        goto err2;
1276                }
1277        }
1278
1279        return 0;
1280
1281err2:
1282        ib_unregister_device(dev);
1283err1:
1284        crypto_free_shash(rxe->tfm);
1285
1286        return err;
1287}
1288
1289int rxe_unregister_device(struct rxe_dev *rxe)
1290{
1291        int i;
1292        struct ib_device *dev = &rxe->ib_dev;
1293
1294        for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i)
1295                device_remove_file(&dev->dev, rxe_dev_attributes[i]);
1296
1297        ib_unregister_device(dev);
1298
1299        return 0;
1300}
1301