linux/drivers/infiniband/sw/rxe/rxe_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/dma-mapping.h>
  35#include <net/addrconf.h>
  36#include "rxe.h"
  37#include "rxe_loc.h"
  38#include "rxe_queue.h"
  39#include "rxe_hw_counters.h"
  40
  41static int rxe_query_device(struct ib_device *dev,
  42                            struct ib_device_attr *attr,
  43                            struct ib_udata *uhw)
  44{
  45        struct rxe_dev *rxe = to_rdev(dev);
  46
  47        if (uhw->inlen || uhw->outlen)
  48                return -EINVAL;
  49
  50        *attr = rxe->attr;
  51        return 0;
  52}
  53
  54static int rxe_query_port(struct ib_device *dev,
  55                          u8 port_num, struct ib_port_attr *attr)
  56{
  57        struct rxe_dev *rxe = to_rdev(dev);
  58        struct rxe_port *port;
  59        int rc = -EINVAL;
  60
  61        if (unlikely(port_num != 1)) {
  62                pr_warn("invalid port_number %d\n", port_num);
  63                goto out;
  64        }
  65
  66        port = &rxe->port;
  67
  68        /* *attr being zeroed by the caller, avoid zeroing it here */
  69        *attr = port->attr;
  70
  71        mutex_lock(&rxe->usdev_lock);
  72        rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
  73                              &attr->active_width);
  74        mutex_unlock(&rxe->usdev_lock);
  75
  76out:
  77        return rc;
  78}
  79
  80static struct net_device *rxe_get_netdev(struct ib_device *device,
  81                                         u8 port_num)
  82{
  83        struct rxe_dev *rxe = to_rdev(device);
  84
  85        if (rxe->ndev) {
  86                dev_hold(rxe->ndev);
  87                return rxe->ndev;
  88        }
  89
  90        return NULL;
  91}
  92
  93static int rxe_query_pkey(struct ib_device *device,
  94                          u8 port_num, u16 index, u16 *pkey)
  95{
  96        struct rxe_dev *rxe = to_rdev(device);
  97        struct rxe_port *port;
  98
  99        if (unlikely(port_num != 1)) {
 100                dev_warn(device->dev.parent, "invalid port_num = %d\n",
 101                         port_num);
 102                goto err1;
 103        }
 104
 105        port = &rxe->port;
 106
 107        if (unlikely(index >= port->attr.pkey_tbl_len)) {
 108                dev_warn(device->dev.parent, "invalid index = %d\n",
 109                         index);
 110                goto err1;
 111        }
 112
 113        *pkey = port->pkey_tbl[index];
 114        return 0;
 115
 116err1:
 117        return -EINVAL;
 118}
 119
 120static int rxe_modify_device(struct ib_device *dev,
 121                             int mask, struct ib_device_modify *attr)
 122{
 123        struct rxe_dev *rxe = to_rdev(dev);
 124
 125        if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
 126                rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
 127
 128        if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
 129                memcpy(rxe->ib_dev.node_desc,
 130                       attr->node_desc, sizeof(rxe->ib_dev.node_desc));
 131        }
 132
 133        return 0;
 134}
 135
 136static int rxe_modify_port(struct ib_device *dev,
 137                           u8 port_num, int mask, struct ib_port_modify *attr)
 138{
 139        struct rxe_dev *rxe = to_rdev(dev);
 140        struct rxe_port *port;
 141
 142        if (unlikely(port_num != 1)) {
 143                pr_warn("invalid port_num = %d\n", port_num);
 144                goto err1;
 145        }
 146
 147        port = &rxe->port;
 148
 149        port->attr.port_cap_flags |= attr->set_port_cap_mask;
 150        port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
 151
 152        if (mask & IB_PORT_RESET_QKEY_CNTR)
 153                port->attr.qkey_viol_cntr = 0;
 154
 155        return 0;
 156
 157err1:
 158        return -EINVAL;
 159}
 160
 161static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
 162                                               u8 port_num)
 163{
 164        struct rxe_dev *rxe = to_rdev(dev);
 165
 166        return rxe_link_layer(rxe, port_num);
 167}
 168
 169static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev,
 170                                              struct ib_udata *udata)
 171{
 172        struct rxe_dev *rxe = to_rdev(dev);
 173        struct rxe_ucontext *uc;
 174
 175        uc = rxe_alloc(&rxe->uc_pool);
 176        return uc ? &uc->ibuc : ERR_PTR(-ENOMEM);
 177}
 178
 179static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
 180{
 181        struct rxe_ucontext *uc = to_ruc(ibuc);
 182
 183        rxe_drop_ref(uc);
 184        return 0;
 185}
 186
 187static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
 188                              struct ib_port_immutable *immutable)
 189{
 190        int err;
 191        struct ib_port_attr attr;
 192
 193        immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
 194
 195        err = ib_query_port(dev, port_num, &attr);
 196        if (err)
 197                return err;
 198
 199        immutable->pkey_tbl_len = attr.pkey_tbl_len;
 200        immutable->gid_tbl_len = attr.gid_tbl_len;
 201        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 202
 203        return 0;
 204}
 205
 206static struct ib_pd *rxe_alloc_pd(struct ib_device *dev,
 207                                  struct ib_ucontext *context,
 208                                  struct ib_udata *udata)
 209{
 210        struct rxe_dev *rxe = to_rdev(dev);
 211        struct rxe_pd *pd;
 212
 213        pd = rxe_alloc(&rxe->pd_pool);
 214        return pd ? &pd->ibpd : ERR_PTR(-ENOMEM);
 215}
 216
 217static int rxe_dealloc_pd(struct ib_pd *ibpd)
 218{
 219        struct rxe_pd *pd = to_rpd(ibpd);
 220
 221        rxe_drop_ref(pd);
 222        return 0;
 223}
 224
 225static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
 226                       struct rxe_av *av)
 227{
 228        int err;
 229        union ib_gid sgid;
 230        struct ib_gid_attr sgid_attr;
 231
 232        err = ib_get_cached_gid(&rxe->ib_dev, rdma_ah_get_port_num(attr),
 233                                rdma_ah_read_grh(attr)->sgid_index, &sgid,
 234                                &sgid_attr);
 235        if (err) {
 236                pr_err("Failed to query sgid. err = %d\n", err);
 237                return err;
 238        }
 239
 240        rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
 241        rxe_av_fill_ip_info(av, attr, &sgid_attr, &sgid);
 242        dev_put(sgid_attr.ndev);
 243        return 0;
 244}
 245
 246static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
 247                                   struct rdma_ah_attr *attr,
 248                                   struct ib_udata *udata)
 249
 250{
 251        int err;
 252        struct rxe_dev *rxe = to_rdev(ibpd->device);
 253        struct rxe_pd *pd = to_rpd(ibpd);
 254        struct rxe_ah *ah;
 255
 256        err = rxe_av_chk_attr(rxe, attr);
 257        if (err)
 258                goto err1;
 259
 260        ah = rxe_alloc(&rxe->ah_pool);
 261        if (!ah) {
 262                err = -ENOMEM;
 263                goto err1;
 264        }
 265
 266        rxe_add_ref(pd);
 267        ah->pd = pd;
 268
 269        err = rxe_init_av(rxe, attr, &ah->av);
 270        if (err)
 271                goto err2;
 272
 273        return &ah->ibah;
 274
 275err2:
 276        rxe_drop_ref(pd);
 277        rxe_drop_ref(ah);
 278err1:
 279        return ERR_PTR(err);
 280}
 281
 282static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 283{
 284        int err;
 285        struct rxe_dev *rxe = to_rdev(ibah->device);
 286        struct rxe_ah *ah = to_rah(ibah);
 287
 288        err = rxe_av_chk_attr(rxe, attr);
 289        if (err)
 290                return err;
 291
 292        err = rxe_init_av(rxe, attr, &ah->av);
 293        if (err)
 294                return err;
 295
 296        return 0;
 297}
 298
 299static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 300{
 301        struct rxe_ah *ah = to_rah(ibah);
 302
 303        memset(attr, 0, sizeof(*attr));
 304        attr->type = ibah->type;
 305        rxe_av_to_attr(&ah->av, attr);
 306        return 0;
 307}
 308
 309static int rxe_destroy_ah(struct ib_ah *ibah)
 310{
 311        struct rxe_ah *ah = to_rah(ibah);
 312
 313        rxe_drop_ref(ah->pd);
 314        rxe_drop_ref(ah);
 315        return 0;
 316}
 317
 318static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr)
 319{
 320        int err;
 321        int i;
 322        u32 length;
 323        struct rxe_recv_wqe *recv_wqe;
 324        int num_sge = ibwr->num_sge;
 325
 326        if (unlikely(queue_full(rq->queue))) {
 327                err = -ENOMEM;
 328                goto err1;
 329        }
 330
 331        if (unlikely(num_sge > rq->max_sge)) {
 332                err = -EINVAL;
 333                goto err1;
 334        }
 335
 336        length = 0;
 337        for (i = 0; i < num_sge; i++)
 338                length += ibwr->sg_list[i].length;
 339
 340        recv_wqe = producer_addr(rq->queue);
 341        recv_wqe->wr_id = ibwr->wr_id;
 342        recv_wqe->num_sge = num_sge;
 343
 344        memcpy(recv_wqe->dma.sge, ibwr->sg_list,
 345               num_sge * sizeof(struct ib_sge));
 346
 347        recv_wqe->dma.length            = length;
 348        recv_wqe->dma.resid             = length;
 349        recv_wqe->dma.num_sge           = num_sge;
 350        recv_wqe->dma.cur_sge           = 0;
 351        recv_wqe->dma.sge_offset        = 0;
 352
 353        /* make sure all changes to the work queue are written before we
 354         * update the producer pointer
 355         */
 356        smp_wmb();
 357
 358        advance_producer(rq->queue);
 359        return 0;
 360
 361err1:
 362        return err;
 363}
 364
 365static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
 366                                     struct ib_srq_init_attr *init,
 367                                     struct ib_udata *udata)
 368{
 369        int err;
 370        struct rxe_dev *rxe = to_rdev(ibpd->device);
 371        struct rxe_pd *pd = to_rpd(ibpd);
 372        struct rxe_srq *srq;
 373        struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
 374        struct rxe_create_srq_resp __user *uresp = NULL;
 375
 376        if (udata) {
 377                if (udata->outlen < sizeof(*uresp))
 378                        return ERR_PTR(-EINVAL);
 379                uresp = udata->outbuf;
 380        }
 381
 382        err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
 383        if (err)
 384                goto err1;
 385
 386        srq = rxe_alloc(&rxe->srq_pool);
 387        if (!srq) {
 388                err = -ENOMEM;
 389                goto err1;
 390        }
 391
 392        rxe_add_index(srq);
 393        rxe_add_ref(pd);
 394        srq->pd = pd;
 395
 396        err = rxe_srq_from_init(rxe, srq, init, context, uresp);
 397        if (err)
 398                goto err2;
 399
 400        return &srq->ibsrq;
 401
 402err2:
 403        rxe_drop_ref(pd);
 404        rxe_drop_index(srq);
 405        rxe_drop_ref(srq);
 406err1:
 407        return ERR_PTR(err);
 408}
 409
 410static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 411                          enum ib_srq_attr_mask mask,
 412                          struct ib_udata *udata)
 413{
 414        int err;
 415        struct rxe_srq *srq = to_rsrq(ibsrq);
 416        struct rxe_dev *rxe = to_rdev(ibsrq->device);
 417        struct rxe_modify_srq_cmd ucmd = {};
 418
 419        if (udata) {
 420                if (udata->inlen < sizeof(ucmd))
 421                        return -EINVAL;
 422
 423                err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
 424                if (err)
 425                        return err;
 426        }
 427
 428        err = rxe_srq_chk_attr(rxe, srq, attr, mask);
 429        if (err)
 430                goto err1;
 431
 432        err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd);
 433        if (err)
 434                goto err1;
 435
 436        return 0;
 437
 438err1:
 439        return err;
 440}
 441
 442static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
 443{
 444        struct rxe_srq *srq = to_rsrq(ibsrq);
 445
 446        if (srq->error)
 447                return -EINVAL;
 448
 449        attr->max_wr = srq->rq.queue->buf->index_mask;
 450        attr->max_sge = srq->rq.max_sge;
 451        attr->srq_limit = srq->limit;
 452        return 0;
 453}
 454
 455static int rxe_destroy_srq(struct ib_srq *ibsrq)
 456{
 457        struct rxe_srq *srq = to_rsrq(ibsrq);
 458
 459        if (srq->rq.queue)
 460                rxe_queue_cleanup(srq->rq.queue);
 461
 462        rxe_drop_ref(srq->pd);
 463        rxe_drop_index(srq);
 464        rxe_drop_ref(srq);
 465
 466        return 0;
 467}
 468
 469static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 470                             struct ib_recv_wr **bad_wr)
 471{
 472        int err = 0;
 473        unsigned long flags;
 474        struct rxe_srq *srq = to_rsrq(ibsrq);
 475
 476        spin_lock_irqsave(&srq->rq.producer_lock, flags);
 477
 478        while (wr) {
 479                err = post_one_recv(&srq->rq, wr);
 480                if (unlikely(err))
 481                        break;
 482                wr = wr->next;
 483        }
 484
 485        spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
 486
 487        if (err)
 488                *bad_wr = wr;
 489
 490        return err;
 491}
 492
 493static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
 494                                   struct ib_qp_init_attr *init,
 495                                   struct ib_udata *udata)
 496{
 497        int err;
 498        struct rxe_dev *rxe = to_rdev(ibpd->device);
 499        struct rxe_pd *pd = to_rpd(ibpd);
 500        struct rxe_qp *qp;
 501        struct rxe_create_qp_resp __user *uresp = NULL;
 502
 503        if (udata) {
 504                if (udata->outlen < sizeof(*uresp))
 505                        return ERR_PTR(-EINVAL);
 506                uresp = udata->outbuf;
 507        }
 508
 509        err = rxe_qp_chk_init(rxe, init);
 510        if (err)
 511                goto err1;
 512
 513        qp = rxe_alloc(&rxe->qp_pool);
 514        if (!qp) {
 515                err = -ENOMEM;
 516                goto err1;
 517        }
 518
 519        if (udata) {
 520                if (udata->inlen) {
 521                        err = -EINVAL;
 522                        goto err2;
 523                }
 524                qp->is_user = 1;
 525        }
 526
 527        rxe_add_index(qp);
 528
 529        err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd);
 530        if (err)
 531                goto err3;
 532
 533        return &qp->ibqp;
 534
 535err3:
 536        rxe_drop_index(qp);
 537err2:
 538        rxe_drop_ref(qp);
 539err1:
 540        return ERR_PTR(err);
 541}
 542
 543static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 544                         int mask, struct ib_udata *udata)
 545{
 546        int err;
 547        struct rxe_dev *rxe = to_rdev(ibqp->device);
 548        struct rxe_qp *qp = to_rqp(ibqp);
 549
 550        err = rxe_qp_chk_attr(rxe, qp, attr, mask);
 551        if (err)
 552                goto err1;
 553
 554        err = rxe_qp_from_attr(qp, attr, mask, udata);
 555        if (err)
 556                goto err1;
 557
 558        return 0;
 559
 560err1:
 561        return err;
 562}
 563
 564static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 565                        int mask, struct ib_qp_init_attr *init)
 566{
 567        struct rxe_qp *qp = to_rqp(ibqp);
 568
 569        rxe_qp_to_init(qp, init);
 570        rxe_qp_to_attr(qp, attr, mask);
 571
 572        return 0;
 573}
 574
 575static int rxe_destroy_qp(struct ib_qp *ibqp)
 576{
 577        struct rxe_qp *qp = to_rqp(ibqp);
 578
 579        rxe_qp_destroy(qp);
 580        rxe_drop_index(qp);
 581        rxe_drop_ref(qp);
 582        return 0;
 583}
 584
 585static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr,
 586                            unsigned int mask, unsigned int length)
 587{
 588        int num_sge = ibwr->num_sge;
 589        struct rxe_sq *sq = &qp->sq;
 590
 591        if (unlikely(num_sge > sq->max_sge))
 592                goto err1;
 593
 594        if (unlikely(mask & WR_ATOMIC_MASK)) {
 595                if (length < 8)
 596                        goto err1;
 597
 598                if (atomic_wr(ibwr)->remote_addr & 0x7)
 599                        goto err1;
 600        }
 601
 602        if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
 603                     (length > sq->max_inline)))
 604                goto err1;
 605
 606        return 0;
 607
 608err1:
 609        return -EINVAL;
 610}
 611
 612static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
 613                         struct ib_send_wr *ibwr)
 614{
 615        wr->wr_id = ibwr->wr_id;
 616        wr->num_sge = ibwr->num_sge;
 617        wr->opcode = ibwr->opcode;
 618        wr->send_flags = ibwr->send_flags;
 619
 620        if (qp_type(qp) == IB_QPT_UD ||
 621            qp_type(qp) == IB_QPT_SMI ||
 622            qp_type(qp) == IB_QPT_GSI) {
 623                wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
 624                wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
 625                if (qp_type(qp) == IB_QPT_GSI)
 626                        wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
 627                if (wr->opcode == IB_WR_SEND_WITH_IMM)
 628                        wr->ex.imm_data = ibwr->ex.imm_data;
 629        } else {
 630                switch (wr->opcode) {
 631                case IB_WR_RDMA_WRITE_WITH_IMM:
 632                        wr->ex.imm_data = ibwr->ex.imm_data;
 633                        /* fall through */
 634                case IB_WR_RDMA_READ:
 635                case IB_WR_RDMA_WRITE:
 636                        wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
 637                        wr->wr.rdma.rkey        = rdma_wr(ibwr)->rkey;
 638                        break;
 639                case IB_WR_SEND_WITH_IMM:
 640                        wr->ex.imm_data = ibwr->ex.imm_data;
 641                        break;
 642                case IB_WR_SEND_WITH_INV:
 643                        wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
 644                        break;
 645                case IB_WR_ATOMIC_CMP_AND_SWP:
 646                case IB_WR_ATOMIC_FETCH_AND_ADD:
 647                        wr->wr.atomic.remote_addr =
 648                                atomic_wr(ibwr)->remote_addr;
 649                        wr->wr.atomic.compare_add =
 650                                atomic_wr(ibwr)->compare_add;
 651                        wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
 652                        wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
 653                        break;
 654                case IB_WR_LOCAL_INV:
 655                        wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
 656                break;
 657                case IB_WR_REG_MR:
 658                        wr->wr.reg.mr = reg_wr(ibwr)->mr;
 659                        wr->wr.reg.key = reg_wr(ibwr)->key;
 660                        wr->wr.reg.access = reg_wr(ibwr)->access;
 661                break;
 662                default:
 663                        break;
 664                }
 665        }
 666}
 667
 668static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
 669                         unsigned int mask, unsigned int length,
 670                         struct rxe_send_wqe *wqe)
 671{
 672        int num_sge = ibwr->num_sge;
 673        struct ib_sge *sge;
 674        int i;
 675        u8 *p;
 676
 677        init_send_wr(qp, &wqe->wr, ibwr);
 678
 679        if (qp_type(qp) == IB_QPT_UD ||
 680            qp_type(qp) == IB_QPT_SMI ||
 681            qp_type(qp) == IB_QPT_GSI)
 682                memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av));
 683
 684        if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) {
 685                p = wqe->dma.inline_data;
 686
 687                sge = ibwr->sg_list;
 688                for (i = 0; i < num_sge; i++, sge++) {
 689                        memcpy(p, (void *)(uintptr_t)sge->addr,
 690                                        sge->length);
 691
 692                        p += sge->length;
 693                }
 694        } else if (mask & WR_REG_MASK) {
 695                wqe->mask = mask;
 696                wqe->state = wqe_state_posted;
 697                return 0;
 698        } else
 699                memcpy(wqe->dma.sge, ibwr->sg_list,
 700                       num_sge * sizeof(struct ib_sge));
 701
 702        wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
 703                mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
 704        wqe->mask               = mask;
 705        wqe->dma.length         = length;
 706        wqe->dma.resid          = length;
 707        wqe->dma.num_sge        = num_sge;
 708        wqe->dma.cur_sge        = 0;
 709        wqe->dma.sge_offset     = 0;
 710        wqe->state              = wqe_state_posted;
 711        wqe->ssn                = atomic_add_return(1, &qp->ssn);
 712
 713        return 0;
 714}
 715
 716static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr,
 717                         unsigned int mask, u32 length)
 718{
 719        int err;
 720        struct rxe_sq *sq = &qp->sq;
 721        struct rxe_send_wqe *send_wqe;
 722        unsigned long flags;
 723
 724        err = validate_send_wr(qp, ibwr, mask, length);
 725        if (err)
 726                return err;
 727
 728        spin_lock_irqsave(&qp->sq.sq_lock, flags);
 729
 730        if (unlikely(queue_full(sq->queue))) {
 731                err = -ENOMEM;
 732                goto err1;
 733        }
 734
 735        send_wqe = producer_addr(sq->queue);
 736
 737        err = init_send_wqe(qp, ibwr, mask, length, send_wqe);
 738        if (unlikely(err))
 739                goto err1;
 740
 741        /*
 742         * make sure all changes to the work queue are
 743         * written before we update the producer pointer
 744         */
 745        smp_wmb();
 746
 747        advance_producer(sq->queue);
 748        spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 749
 750        return 0;
 751
 752err1:
 753        spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 754        return err;
 755}
 756
 757static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
 758                                struct ib_send_wr **bad_wr)
 759{
 760        int err = 0;
 761        unsigned int mask;
 762        unsigned int length = 0;
 763        int i;
 764
 765        while (wr) {
 766                mask = wr_opcode_mask(wr->opcode, qp);
 767                if (unlikely(!mask)) {
 768                        err = -EINVAL;
 769                        *bad_wr = wr;
 770                        break;
 771                }
 772
 773                if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
 774                             !(mask & WR_INLINE_MASK))) {
 775                        err = -EINVAL;
 776                        *bad_wr = wr;
 777                        break;
 778                }
 779
 780                length = 0;
 781                for (i = 0; i < wr->num_sge; i++)
 782                        length += wr->sg_list[i].length;
 783
 784                err = post_one_send(qp, wr, mask, length);
 785
 786                if (err) {
 787                        *bad_wr = wr;
 788                        break;
 789                }
 790                wr = wr->next;
 791        }
 792
 793        rxe_run_task(&qp->req.task, 1);
 794        if (unlikely(qp->req.state == QP_STATE_ERROR))
 795                rxe_run_task(&qp->comp.task, 1);
 796
 797        return err;
 798}
 799
 800static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 801                         struct ib_send_wr **bad_wr)
 802{
 803        struct rxe_qp *qp = to_rqp(ibqp);
 804
 805        if (unlikely(!qp->valid)) {
 806                *bad_wr = wr;
 807                return -EINVAL;
 808        }
 809
 810        if (unlikely(qp->req.state < QP_STATE_READY)) {
 811                *bad_wr = wr;
 812                return -EINVAL;
 813        }
 814
 815        if (qp->is_user) {
 816                /* Utilize process context to do protocol processing */
 817                rxe_run_task(&qp->req.task, 0);
 818                return 0;
 819        } else
 820                return rxe_post_send_kernel(qp, wr, bad_wr);
 821}
 822
 823static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 824                         struct ib_recv_wr **bad_wr)
 825{
 826        int err = 0;
 827        struct rxe_qp *qp = to_rqp(ibqp);
 828        struct rxe_rq *rq = &qp->rq;
 829        unsigned long flags;
 830
 831        if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
 832                *bad_wr = wr;
 833                err = -EINVAL;
 834                goto err1;
 835        }
 836
 837        if (unlikely(qp->srq)) {
 838                *bad_wr = wr;
 839                err = -EINVAL;
 840                goto err1;
 841        }
 842
 843        spin_lock_irqsave(&rq->producer_lock, flags);
 844
 845        while (wr) {
 846                err = post_one_recv(rq, wr);
 847                if (unlikely(err)) {
 848                        *bad_wr = wr;
 849                        break;
 850                }
 851                wr = wr->next;
 852        }
 853
 854        spin_unlock_irqrestore(&rq->producer_lock, flags);
 855
 856        if (qp->resp.state == QP_STATE_ERROR)
 857                rxe_run_task(&qp->resp.task, 1);
 858
 859err1:
 860        return err;
 861}
 862
 863static struct ib_cq *rxe_create_cq(struct ib_device *dev,
 864                                   const struct ib_cq_init_attr *attr,
 865                                   struct ib_ucontext *context,
 866                                   struct ib_udata *udata)
 867{
 868        int err;
 869        struct rxe_dev *rxe = to_rdev(dev);
 870        struct rxe_cq *cq;
 871        struct rxe_create_cq_resp __user *uresp = NULL;
 872
 873        if (udata) {
 874                if (udata->outlen < sizeof(*uresp))
 875                        return ERR_PTR(-EINVAL);
 876                uresp = udata->outbuf;
 877        }
 878
 879        if (attr->flags)
 880                return ERR_PTR(-EINVAL);
 881
 882        err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
 883        if (err)
 884                goto err1;
 885
 886        cq = rxe_alloc(&rxe->cq_pool);
 887        if (!cq) {
 888                err = -ENOMEM;
 889                goto err1;
 890        }
 891
 892        err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector,
 893                               context, uresp);
 894        if (err)
 895                goto err2;
 896
 897        return &cq->ibcq;
 898
 899err2:
 900        rxe_drop_ref(cq);
 901err1:
 902        return ERR_PTR(err);
 903}
 904
 905static int rxe_destroy_cq(struct ib_cq *ibcq)
 906{
 907        struct rxe_cq *cq = to_rcq(ibcq);
 908
 909        rxe_cq_disable(cq);
 910
 911        rxe_drop_ref(cq);
 912        return 0;
 913}
 914
 915static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 916{
 917        int err;
 918        struct rxe_cq *cq = to_rcq(ibcq);
 919        struct rxe_dev *rxe = to_rdev(ibcq->device);
 920        struct rxe_resize_cq_resp __user *uresp = NULL;
 921
 922        if (udata) {
 923                if (udata->outlen < sizeof(*uresp))
 924                        return -EINVAL;
 925                uresp = udata->outbuf;
 926        }
 927
 928        err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
 929        if (err)
 930                goto err1;
 931
 932        err = rxe_cq_resize_queue(cq, cqe, uresp);
 933        if (err)
 934                goto err1;
 935
 936        return 0;
 937
 938err1:
 939        return err;
 940}
 941
 942static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 943{
 944        int i;
 945        struct rxe_cq *cq = to_rcq(ibcq);
 946        struct rxe_cqe *cqe;
 947        unsigned long flags;
 948
 949        spin_lock_irqsave(&cq->cq_lock, flags);
 950        for (i = 0; i < num_entries; i++) {
 951                cqe = queue_head(cq->queue);
 952                if (!cqe)
 953                        break;
 954
 955                memcpy(wc++, &cqe->ibwc, sizeof(*wc));
 956                advance_consumer(cq->queue);
 957        }
 958        spin_unlock_irqrestore(&cq->cq_lock, flags);
 959
 960        return i;
 961}
 962
 963static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
 964{
 965        struct rxe_cq *cq = to_rcq(ibcq);
 966        int count = queue_count(cq->queue);
 967
 968        return (count > wc_cnt) ? wc_cnt : count;
 969}
 970
 971static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 972{
 973        struct rxe_cq *cq = to_rcq(ibcq);
 974        unsigned long irq_flags;
 975        int ret = 0;
 976
 977        spin_lock_irqsave(&cq->cq_lock, irq_flags);
 978        if (cq->notify != IB_CQ_NEXT_COMP)
 979                cq->notify = flags & IB_CQ_SOLICITED_MASK;
 980
 981        if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue))
 982                ret = 1;
 983
 984        spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
 985
 986        return ret;
 987}
 988
 989static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
 990{
 991        struct rxe_dev *rxe = to_rdev(ibpd->device);
 992        struct rxe_pd *pd = to_rpd(ibpd);
 993        struct rxe_mem *mr;
 994        int err;
 995
 996        mr = rxe_alloc(&rxe->mr_pool);
 997        if (!mr) {
 998                err = -ENOMEM;
 999                goto err1;
1000        }
1001
1002        rxe_add_index(mr);
1003
1004        rxe_add_ref(pd);
1005
1006        err = rxe_mem_init_dma(pd, access, mr);
1007        if (err)
1008                goto err2;
1009
1010        return &mr->ibmr;
1011
1012err2:
1013        rxe_drop_ref(pd);
1014        rxe_drop_index(mr);
1015        rxe_drop_ref(mr);
1016err1:
1017        return ERR_PTR(err);
1018}
1019
1020static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
1021                                     u64 start,
1022                                     u64 length,
1023                                     u64 iova,
1024                                     int access, struct ib_udata *udata)
1025{
1026        int err;
1027        struct rxe_dev *rxe = to_rdev(ibpd->device);
1028        struct rxe_pd *pd = to_rpd(ibpd);
1029        struct rxe_mem *mr;
1030
1031        mr = rxe_alloc(&rxe->mr_pool);
1032        if (!mr) {
1033                err = -ENOMEM;
1034                goto err2;
1035        }
1036
1037        rxe_add_index(mr);
1038
1039        rxe_add_ref(pd);
1040
1041        err = rxe_mem_init_user(pd, start, length, iova,
1042                                access, udata, mr);
1043        if (err)
1044                goto err3;
1045
1046        return &mr->ibmr;
1047
1048err3:
1049        rxe_drop_ref(pd);
1050        rxe_drop_index(mr);
1051        rxe_drop_ref(mr);
1052err2:
1053        return ERR_PTR(err);
1054}
1055
1056static int rxe_dereg_mr(struct ib_mr *ibmr)
1057{
1058        struct rxe_mem *mr = to_rmr(ibmr);
1059
1060        mr->state = RXE_MEM_STATE_ZOMBIE;
1061        rxe_drop_ref(mr->pd);
1062        rxe_drop_index(mr);
1063        rxe_drop_ref(mr);
1064        return 0;
1065}
1066
1067static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd,
1068                                  enum ib_mr_type mr_type,
1069                                  u32 max_num_sg)
1070{
1071        struct rxe_dev *rxe = to_rdev(ibpd->device);
1072        struct rxe_pd *pd = to_rpd(ibpd);
1073        struct rxe_mem *mr;
1074        int err;
1075
1076        if (mr_type != IB_MR_TYPE_MEM_REG)
1077                return ERR_PTR(-EINVAL);
1078
1079        mr = rxe_alloc(&rxe->mr_pool);
1080        if (!mr) {
1081                err = -ENOMEM;
1082                goto err1;
1083        }
1084
1085        rxe_add_index(mr);
1086
1087        rxe_add_ref(pd);
1088
1089        err = rxe_mem_init_fast(pd, max_num_sg, mr);
1090        if (err)
1091                goto err2;
1092
1093        return &mr->ibmr;
1094
1095err2:
1096        rxe_drop_ref(pd);
1097        rxe_drop_index(mr);
1098        rxe_drop_ref(mr);
1099err1:
1100        return ERR_PTR(err);
1101}
1102
1103static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
1104{
1105        struct rxe_mem *mr = to_rmr(ibmr);
1106        struct rxe_map *map;
1107        struct rxe_phys_buf *buf;
1108
1109        if (unlikely(mr->nbuf == mr->num_buf))
1110                return -ENOMEM;
1111
1112        map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
1113        buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
1114
1115        buf->addr = addr;
1116        buf->size = ibmr->page_size;
1117        mr->nbuf++;
1118
1119        return 0;
1120}
1121
1122static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1123                         int sg_nents, unsigned int *sg_offset)
1124{
1125        struct rxe_mem *mr = to_rmr(ibmr);
1126        int n;
1127
1128        mr->nbuf = 0;
1129
1130        n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
1131
1132        mr->va = ibmr->iova;
1133        mr->iova = ibmr->iova;
1134        mr->length = ibmr->length;
1135        mr->page_shift = ilog2(ibmr->page_size);
1136        mr->page_mask = ibmr->page_size - 1;
1137        mr->offset = mr->iova & mr->page_mask;
1138
1139        return n;
1140}
1141
1142static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1143{
1144        int err;
1145        struct rxe_dev *rxe = to_rdev(ibqp->device);
1146        struct rxe_qp *qp = to_rqp(ibqp);
1147        struct rxe_mc_grp *grp;
1148
1149        /* takes a ref on grp if successful */
1150        err = rxe_mcast_get_grp(rxe, mgid, &grp);
1151        if (err)
1152                return err;
1153
1154        err = rxe_mcast_add_grp_elem(rxe, qp, grp);
1155
1156        rxe_drop_ref(grp);
1157        return err;
1158}
1159
1160static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1161{
1162        struct rxe_dev *rxe = to_rdev(ibqp->device);
1163        struct rxe_qp *qp = to_rqp(ibqp);
1164
1165        return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
1166}
1167
1168static ssize_t parent_show(struct device *device,
1169                           struct device_attribute *attr, char *buf)
1170{
1171        struct rxe_dev *rxe = container_of(device, struct rxe_dev,
1172                                           ib_dev.dev);
1173
1174        return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
1175}
1176
1177static DEVICE_ATTR_RO(parent);
1178
1179static struct device_attribute *rxe_dev_attributes[] = {
1180        &dev_attr_parent,
1181};
1182
1183int rxe_register_device(struct rxe_dev *rxe)
1184{
1185        int err;
1186        int i;
1187        struct ib_device *dev = &rxe->ib_dev;
1188        struct crypto_shash *tfm;
1189
1190        strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX);
1191        strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1192
1193        dev->owner = THIS_MODULE;
1194        dev->node_type = RDMA_NODE_IB_CA;
1195        dev->phys_port_cnt = 1;
1196        dev->num_comp_vectors = num_possible_cpus();
1197        dev->dev.parent = rxe_dma_device(rxe);
1198        dev->local_dma_lkey = 0;
1199        addrconf_addr_eui48((unsigned char *)&dev->node_guid,
1200                            rxe->ndev->dev_addr);
1201        dev->dev.dma_ops = &dma_virt_ops;
1202        dma_coerce_mask_and_coherent(&dev->dev,
1203                                     dma_get_required_mask(&dev->dev));
1204
1205        dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
1206        dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
1207            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
1208            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE)
1209            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT)
1210            | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD)
1211            | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD)
1212            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ)
1213            | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ)
1214            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ)
1215            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ)
1216            | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV)
1217            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP)
1218            | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP)
1219            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP)
1220            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP)
1221            | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND)
1222            | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV)
1223            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ)
1224            | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ)
1225            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ)
1226            | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ)
1227            | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ)
1228            | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)
1229            | BIT_ULL(IB_USER_VERBS_CMD_REG_MR)
1230            | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR)
1231            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH)
1232            | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH)
1233            | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH)
1234            | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH)
1235            | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST)
1236            | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST)
1237            ;
1238
1239        dev->query_device = rxe_query_device;
1240        dev->modify_device = rxe_modify_device;
1241        dev->query_port = rxe_query_port;
1242        dev->modify_port = rxe_modify_port;
1243        dev->get_link_layer = rxe_get_link_layer;
1244        dev->get_netdev = rxe_get_netdev;
1245        dev->query_pkey = rxe_query_pkey;
1246        dev->alloc_ucontext = rxe_alloc_ucontext;
1247        dev->dealloc_ucontext = rxe_dealloc_ucontext;
1248        dev->mmap = rxe_mmap;
1249        dev->get_port_immutable = rxe_port_immutable;
1250        dev->alloc_pd = rxe_alloc_pd;
1251        dev->dealloc_pd = rxe_dealloc_pd;
1252        dev->create_ah = rxe_create_ah;
1253        dev->modify_ah = rxe_modify_ah;
1254        dev->query_ah = rxe_query_ah;
1255        dev->destroy_ah = rxe_destroy_ah;
1256        dev->create_srq = rxe_create_srq;
1257        dev->modify_srq = rxe_modify_srq;
1258        dev->query_srq = rxe_query_srq;
1259        dev->destroy_srq = rxe_destroy_srq;
1260        dev->post_srq_recv = rxe_post_srq_recv;
1261        dev->create_qp = rxe_create_qp;
1262        dev->modify_qp = rxe_modify_qp;
1263        dev->query_qp = rxe_query_qp;
1264        dev->destroy_qp = rxe_destroy_qp;
1265        dev->post_send = rxe_post_send;
1266        dev->post_recv = rxe_post_recv;
1267        dev->create_cq = rxe_create_cq;
1268        dev->destroy_cq = rxe_destroy_cq;
1269        dev->resize_cq = rxe_resize_cq;
1270        dev->poll_cq = rxe_poll_cq;
1271        dev->peek_cq = rxe_peek_cq;
1272        dev->req_notify_cq = rxe_req_notify_cq;
1273        dev->get_dma_mr = rxe_get_dma_mr;
1274        dev->reg_user_mr = rxe_reg_user_mr;
1275        dev->dereg_mr = rxe_dereg_mr;
1276        dev->alloc_mr = rxe_alloc_mr;
1277        dev->map_mr_sg = rxe_map_mr_sg;
1278        dev->attach_mcast = rxe_attach_mcast;
1279        dev->detach_mcast = rxe_detach_mcast;
1280        dev->get_hw_stats = rxe_ib_get_hw_stats;
1281        dev->alloc_hw_stats = rxe_ib_alloc_hw_stats;
1282
1283        tfm = crypto_alloc_shash("crc32", 0, 0);
1284        if (IS_ERR(tfm)) {
1285                pr_err("failed to allocate crc algorithm err:%ld\n",
1286                       PTR_ERR(tfm));
1287                return PTR_ERR(tfm);
1288        }
1289        rxe->tfm = tfm;
1290
1291        dev->driver_id = RDMA_DRIVER_RXE;
1292        err = ib_register_device(dev, NULL);
1293        if (err) {
1294                pr_warn("%s failed with error %d\n", __func__, err);
1295                goto err1;
1296        }
1297
1298        for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
1299                err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
1300                if (err) {
1301                        pr_warn("%s failed with error %d for attr number %d\n",
1302                                __func__, err, i);
1303                        goto err2;
1304                }
1305        }
1306
1307        return 0;
1308
1309err2:
1310        ib_unregister_device(dev);
1311err1:
1312        crypto_free_shash(rxe->tfm);
1313
1314        return err;
1315}
1316
1317int rxe_unregister_device(struct rxe_dev *rxe)
1318{
1319        int i;
1320        struct ib_device *dev = &rxe->ib_dev;
1321
1322        for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i)
1323                device_remove_file(&dev->dev, rxe_dev_attributes[i]);
1324
1325        ib_unregister_device(dev);
1326
1327        return 0;
1328}
1329