linux/drivers/infiniband/sw/rxe/rxe_verbs.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   5 */
   6
   7#include <linux/dma-mapping.h>
   8#include <net/addrconf.h>
   9#include <rdma/uverbs_ioctl.h>
  10#include "rxe.h"
  11#include "rxe_loc.h"
  12#include "rxe_queue.h"
  13#include "rxe_hw_counters.h"
  14
  15static int rxe_query_device(struct ib_device *dev,
  16                            struct ib_device_attr *attr,
  17                            struct ib_udata *uhw)
  18{
  19        struct rxe_dev *rxe = to_rdev(dev);
  20
  21        if (uhw->inlen || uhw->outlen)
  22                return -EINVAL;
  23
  24        *attr = rxe->attr;
  25        return 0;
  26}
  27
  28static int rxe_query_port(struct ib_device *dev,
  29                          u32 port_num, struct ib_port_attr *attr)
  30{
  31        struct rxe_dev *rxe = to_rdev(dev);
  32        struct rxe_port *port;
  33        int rc;
  34
  35        port = &rxe->port;
  36
  37        /* *attr being zeroed by the caller, avoid zeroing it here */
  38        *attr = port->attr;
  39
  40        mutex_lock(&rxe->usdev_lock);
  41        rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
  42                              &attr->active_width);
  43
  44        if (attr->state == IB_PORT_ACTIVE)
  45                attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
  46        else if (dev_get_flags(rxe->ndev) & IFF_UP)
  47                attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
  48        else
  49                attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
  50
  51        mutex_unlock(&rxe->usdev_lock);
  52
  53        return rc;
  54}
  55
  56static int rxe_query_pkey(struct ib_device *device,
  57                          u32 port_num, u16 index, u16 *pkey)
  58{
  59        if (index > 0)
  60                return -EINVAL;
  61
  62        *pkey = IB_DEFAULT_PKEY_FULL;
  63        return 0;
  64}
  65
  66static int rxe_modify_device(struct ib_device *dev,
  67                             int mask, struct ib_device_modify *attr)
  68{
  69        struct rxe_dev *rxe = to_rdev(dev);
  70
  71        if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
  72                     IB_DEVICE_MODIFY_NODE_DESC))
  73                return -EOPNOTSUPP;
  74
  75        if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
  76                rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
  77
  78        if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
  79                memcpy(rxe->ib_dev.node_desc,
  80                       attr->node_desc, sizeof(rxe->ib_dev.node_desc));
  81        }
  82
  83        return 0;
  84}
  85
  86static int rxe_modify_port(struct ib_device *dev,
  87                           u32 port_num, int mask, struct ib_port_modify *attr)
  88{
  89        struct rxe_dev *rxe = to_rdev(dev);
  90        struct rxe_port *port;
  91
  92        port = &rxe->port;
  93
  94        port->attr.port_cap_flags |= attr->set_port_cap_mask;
  95        port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
  96
  97        if (mask & IB_PORT_RESET_QKEY_CNTR)
  98                port->attr.qkey_viol_cntr = 0;
  99
 100        return 0;
 101}
 102
 103static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
 104                                               u32 port_num)
 105{
 106        return IB_LINK_LAYER_ETHERNET;
 107}
 108
 109static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata)
 110{
 111        struct rxe_dev *rxe = to_rdev(ibuc->device);
 112        struct rxe_ucontext *uc = to_ruc(ibuc);
 113
 114        return rxe_add_to_pool(&rxe->uc_pool, uc);
 115}
 116
 117static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
 118{
 119        struct rxe_ucontext *uc = to_ruc(ibuc);
 120
 121        rxe_drop_ref(uc);
 122}
 123
 124static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
 125                              struct ib_port_immutable *immutable)
 126{
 127        int err;
 128        struct ib_port_attr attr;
 129
 130        immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
 131
 132        err = ib_query_port(dev, port_num, &attr);
 133        if (err)
 134                return err;
 135
 136        immutable->pkey_tbl_len = attr.pkey_tbl_len;
 137        immutable->gid_tbl_len = attr.gid_tbl_len;
 138        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 139
 140        return 0;
 141}
 142
 143static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 144{
 145        struct rxe_dev *rxe = to_rdev(ibpd->device);
 146        struct rxe_pd *pd = to_rpd(ibpd);
 147
 148        return rxe_add_to_pool(&rxe->pd_pool, pd);
 149}
 150
 151static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 152{
 153        struct rxe_pd *pd = to_rpd(ibpd);
 154
 155        rxe_drop_ref(pd);
 156        return 0;
 157}
 158
 159static int rxe_create_ah(struct ib_ah *ibah,
 160                         struct rdma_ah_init_attr *init_attr,
 161                         struct ib_udata *udata)
 162
 163{
 164        int err;
 165        struct rxe_dev *rxe = to_rdev(ibah->device);
 166        struct rxe_ah *ah = to_rah(ibah);
 167
 168        err = rxe_av_chk_attr(rxe, init_attr->ah_attr);
 169        if (err)
 170                return err;
 171
 172        err = rxe_add_to_pool(&rxe->ah_pool, ah);
 173        if (err)
 174                return err;
 175
 176        rxe_init_av(init_attr->ah_attr, &ah->av);
 177        return 0;
 178}
 179
 180static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 181{
 182        int err;
 183        struct rxe_dev *rxe = to_rdev(ibah->device);
 184        struct rxe_ah *ah = to_rah(ibah);
 185
 186        err = rxe_av_chk_attr(rxe, attr);
 187        if (err)
 188                return err;
 189
 190        rxe_init_av(attr, &ah->av);
 191        return 0;
 192}
 193
 194static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 195{
 196        struct rxe_ah *ah = to_rah(ibah);
 197
 198        memset(attr, 0, sizeof(*attr));
 199        attr->type = ibah->type;
 200        rxe_av_to_attr(&ah->av, attr);
 201        return 0;
 202}
 203
 204static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
 205{
 206        struct rxe_ah *ah = to_rah(ibah);
 207
 208        rxe_drop_ref(ah);
 209        return 0;
 210}
 211
 212static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
 213{
 214        int err;
 215        int i;
 216        u32 length;
 217        struct rxe_recv_wqe *recv_wqe;
 218        int num_sge = ibwr->num_sge;
 219        int full;
 220
 221        if (rq->is_user)
 222                full = queue_full(rq->queue, QUEUE_TYPE_FROM_USER);
 223        else
 224                full = queue_full(rq->queue, QUEUE_TYPE_KERNEL);
 225
 226        if (unlikely(full)) {
 227                err = -ENOMEM;
 228                goto err1;
 229        }
 230
 231        if (unlikely(num_sge > rq->max_sge)) {
 232                err = -EINVAL;
 233                goto err1;
 234        }
 235
 236        length = 0;
 237        for (i = 0; i < num_sge; i++)
 238                length += ibwr->sg_list[i].length;
 239
 240        if (rq->is_user)
 241                recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_FROM_USER);
 242        else
 243                recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_KERNEL);
 244
 245        recv_wqe->wr_id = ibwr->wr_id;
 246        recv_wqe->num_sge = num_sge;
 247
 248        memcpy(recv_wqe->dma.sge, ibwr->sg_list,
 249               num_sge * sizeof(struct ib_sge));
 250
 251        recv_wqe->dma.length            = length;
 252        recv_wqe->dma.resid             = length;
 253        recv_wqe->dma.num_sge           = num_sge;
 254        recv_wqe->dma.cur_sge           = 0;
 255        recv_wqe->dma.sge_offset        = 0;
 256
 257        if (rq->is_user)
 258                advance_producer(rq->queue, QUEUE_TYPE_FROM_USER);
 259        else
 260                advance_producer(rq->queue, QUEUE_TYPE_KERNEL);
 261
 262        return 0;
 263
 264err1:
 265        return err;
 266}
 267
 268static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
 269                          struct ib_udata *udata)
 270{
 271        int err;
 272        struct rxe_dev *rxe = to_rdev(ibsrq->device);
 273        struct rxe_pd *pd = to_rpd(ibsrq->pd);
 274        struct rxe_srq *srq = to_rsrq(ibsrq);
 275        struct rxe_create_srq_resp __user *uresp = NULL;
 276
 277        if (init->srq_type != IB_SRQT_BASIC)
 278                return -EOPNOTSUPP;
 279
 280        if (udata) {
 281                if (udata->outlen < sizeof(*uresp))
 282                        return -EINVAL;
 283                uresp = udata->outbuf;
 284                srq->is_user = true;
 285        } else {
 286                srq->is_user = false;
 287        }
 288
 289        err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
 290        if (err)
 291                goto err1;
 292
 293        err = rxe_add_to_pool(&rxe->srq_pool, srq);
 294        if (err)
 295                goto err1;
 296
 297        rxe_add_ref(pd);
 298        srq->pd = pd;
 299
 300        err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
 301        if (err)
 302                goto err2;
 303
 304        return 0;
 305
 306err2:
 307        rxe_drop_ref(pd);
 308        rxe_drop_ref(srq);
 309err1:
 310        return err;
 311}
 312
 313static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 314                          enum ib_srq_attr_mask mask,
 315                          struct ib_udata *udata)
 316{
 317        int err;
 318        struct rxe_srq *srq = to_rsrq(ibsrq);
 319        struct rxe_dev *rxe = to_rdev(ibsrq->device);
 320        struct rxe_modify_srq_cmd ucmd = {};
 321
 322        if (udata) {
 323                if (udata->inlen < sizeof(ucmd))
 324                        return -EINVAL;
 325
 326                err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
 327                if (err)
 328                        return err;
 329        }
 330
 331        err = rxe_srq_chk_attr(rxe, srq, attr, mask);
 332        if (err)
 333                goto err1;
 334
 335        err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
 336        if (err)
 337                goto err1;
 338
 339        return 0;
 340
 341err1:
 342        return err;
 343}
 344
 345static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
 346{
 347        struct rxe_srq *srq = to_rsrq(ibsrq);
 348
 349        if (srq->error)
 350                return -EINVAL;
 351
 352        attr->max_wr = srq->rq.queue->buf->index_mask;
 353        attr->max_sge = srq->rq.max_sge;
 354        attr->srq_limit = srq->limit;
 355        return 0;
 356}
 357
 358static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 359{
 360        struct rxe_srq *srq = to_rsrq(ibsrq);
 361
 362        if (srq->rq.queue)
 363                rxe_queue_cleanup(srq->rq.queue);
 364
 365        rxe_drop_ref(srq->pd);
 366        rxe_drop_ref(srq);
 367        return 0;
 368}
 369
 370static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 371                             const struct ib_recv_wr **bad_wr)
 372{
 373        int err = 0;
 374        unsigned long flags;
 375        struct rxe_srq *srq = to_rsrq(ibsrq);
 376
 377        spin_lock_irqsave(&srq->rq.producer_lock, flags);
 378
 379        while (wr) {
 380                err = post_one_recv(&srq->rq, wr);
 381                if (unlikely(err))
 382                        break;
 383                wr = wr->next;
 384        }
 385
 386        spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
 387
 388        if (err)
 389                *bad_wr = wr;
 390
 391        return err;
 392}
 393
 394static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
 395                         struct ib_udata *udata)
 396{
 397        int err;
 398        struct rxe_dev *rxe = to_rdev(ibqp->device);
 399        struct rxe_pd *pd = to_rpd(ibqp->pd);
 400        struct rxe_qp *qp = to_rqp(ibqp);
 401        struct rxe_create_qp_resp __user *uresp = NULL;
 402
 403        if (udata) {
 404                if (udata->outlen < sizeof(*uresp))
 405                        return -EINVAL;
 406                uresp = udata->outbuf;
 407        }
 408
 409        if (init->create_flags)
 410                return -EOPNOTSUPP;
 411
 412        err = rxe_qp_chk_init(rxe, init);
 413        if (err)
 414                return err;
 415
 416        if (udata) {
 417                if (udata->inlen)
 418                        return -EINVAL;
 419
 420                qp->is_user = true;
 421        } else {
 422                qp->is_user = false;
 423        }
 424
 425        err = rxe_add_to_pool(&rxe->qp_pool, qp);
 426        if (err)
 427                return err;
 428
 429        rxe_add_index(qp);
 430        err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata);
 431        if (err)
 432                goto qp_init;
 433
 434        return 0;
 435
 436qp_init:
 437        rxe_drop_index(qp);
 438        rxe_drop_ref(qp);
 439        return err;
 440}
 441
 442static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 443                         int mask, struct ib_udata *udata)
 444{
 445        int err;
 446        struct rxe_dev *rxe = to_rdev(ibqp->device);
 447        struct rxe_qp *qp = to_rqp(ibqp);
 448
 449        if (mask & ~IB_QP_ATTR_STANDARD_BITS)
 450                return -EOPNOTSUPP;
 451
 452        err = rxe_qp_chk_attr(rxe, qp, attr, mask);
 453        if (err)
 454                goto err1;
 455
 456        err = rxe_qp_from_attr(qp, attr, mask, udata);
 457        if (err)
 458                goto err1;
 459
 460        return 0;
 461
 462err1:
 463        return err;
 464}
 465
 466static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 467                        int mask, struct ib_qp_init_attr *init)
 468{
 469        struct rxe_qp *qp = to_rqp(ibqp);
 470
 471        rxe_qp_to_init(qp, init);
 472        rxe_qp_to_attr(qp, attr, mask);
 473
 474        return 0;
 475}
 476
 477static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 478{
 479        struct rxe_qp *qp = to_rqp(ibqp);
 480
 481        rxe_qp_destroy(qp);
 482        rxe_drop_index(qp);
 483        rxe_drop_ref(qp);
 484        return 0;
 485}
 486
 487static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 488                            unsigned int mask, unsigned int length)
 489{
 490        int num_sge = ibwr->num_sge;
 491        struct rxe_sq *sq = &qp->sq;
 492
 493        if (unlikely(num_sge > sq->max_sge))
 494                goto err1;
 495
 496        if (unlikely(mask & WR_ATOMIC_MASK)) {
 497                if (length < 8)
 498                        goto err1;
 499
 500                if (atomic_wr(ibwr)->remote_addr & 0x7)
 501                        goto err1;
 502        }
 503
 504        if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
 505                     (length > sq->max_inline)))
 506                goto err1;
 507
 508        return 0;
 509
 510err1:
 511        return -EINVAL;
 512}
 513
 514static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
 515                         const struct ib_send_wr *ibwr)
 516{
 517        wr->wr_id = ibwr->wr_id;
 518        wr->num_sge = ibwr->num_sge;
 519        wr->opcode = ibwr->opcode;
 520        wr->send_flags = ibwr->send_flags;
 521
 522        if (qp_type(qp) == IB_QPT_UD ||
 523            qp_type(qp) == IB_QPT_SMI ||
 524            qp_type(qp) == IB_QPT_GSI) {
 525                wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
 526                wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
 527                if (qp_type(qp) == IB_QPT_GSI)
 528                        wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
 529                if (wr->opcode == IB_WR_SEND_WITH_IMM)
 530                        wr->ex.imm_data = ibwr->ex.imm_data;
 531        } else {
 532                switch (wr->opcode) {
 533                case IB_WR_RDMA_WRITE_WITH_IMM:
 534                        wr->ex.imm_data = ibwr->ex.imm_data;
 535                        fallthrough;
 536                case IB_WR_RDMA_READ:
 537                case IB_WR_RDMA_WRITE:
 538                        wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
 539                        wr->wr.rdma.rkey        = rdma_wr(ibwr)->rkey;
 540                        break;
 541                case IB_WR_SEND_WITH_IMM:
 542                        wr->ex.imm_data = ibwr->ex.imm_data;
 543                        break;
 544                case IB_WR_SEND_WITH_INV:
 545                        wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
 546                        break;
 547                case IB_WR_ATOMIC_CMP_AND_SWP:
 548                case IB_WR_ATOMIC_FETCH_AND_ADD:
 549                        wr->wr.atomic.remote_addr =
 550                                atomic_wr(ibwr)->remote_addr;
 551                        wr->wr.atomic.compare_add =
 552                                atomic_wr(ibwr)->compare_add;
 553                        wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
 554                        wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
 555                        break;
 556                case IB_WR_LOCAL_INV:
 557                        wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
 558                break;
 559                case IB_WR_REG_MR:
 560                        wr->wr.reg.mr = reg_wr(ibwr)->mr;
 561                        wr->wr.reg.key = reg_wr(ibwr)->key;
 562                        wr->wr.reg.access = reg_wr(ibwr)->access;
 563                break;
 564                default:
 565                        break;
 566                }
 567        }
 568}
 569
 570static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
 571                                    const struct ib_send_wr *ibwr)
 572{
 573        struct ib_sge *sge = ibwr->sg_list;
 574        u8 *p = wqe->dma.inline_data;
 575        int i;
 576
 577        for (i = 0; i < ibwr->num_sge; i++, sge++) {
 578                memcpy(p, (void *)(uintptr_t)sge->addr, sge->length);
 579                p += sge->length;
 580        }
 581}
 582
 583static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 584                         unsigned int mask, unsigned int length,
 585                         struct rxe_send_wqe *wqe)
 586{
 587        int num_sge = ibwr->num_sge;
 588
 589        init_send_wr(qp, &wqe->wr, ibwr);
 590
 591        /* local operation */
 592        if (unlikely(mask & WR_LOCAL_OP_MASK)) {
 593                wqe->mask = mask;
 594                wqe->state = wqe_state_posted;
 595                return;
 596        }
 597
 598        if (qp_type(qp) == IB_QPT_UD ||
 599            qp_type(qp) == IB_QPT_SMI ||
 600            qp_type(qp) == IB_QPT_GSI)
 601                memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av));
 602
 603        if (unlikely(ibwr->send_flags & IB_SEND_INLINE))
 604                copy_inline_data_to_wqe(wqe, ibwr);
 605        else
 606                memcpy(wqe->dma.sge, ibwr->sg_list,
 607                       num_sge * sizeof(struct ib_sge));
 608
 609        wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
 610                mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
 611        wqe->mask               = mask;
 612        wqe->dma.length         = length;
 613        wqe->dma.resid          = length;
 614        wqe->dma.num_sge        = num_sge;
 615        wqe->dma.cur_sge        = 0;
 616        wqe->dma.sge_offset     = 0;
 617        wqe->state              = wqe_state_posted;
 618        wqe->ssn                = atomic_add_return(1, &qp->ssn);
 619}
 620
 621static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 622                         unsigned int mask, u32 length)
 623{
 624        int err;
 625        struct rxe_sq *sq = &qp->sq;
 626        struct rxe_send_wqe *send_wqe;
 627        unsigned long flags;
 628        int full;
 629
 630        err = validate_send_wr(qp, ibwr, mask, length);
 631        if (err)
 632                return err;
 633
 634        spin_lock_irqsave(&qp->sq.sq_lock, flags);
 635
 636        if (qp->is_user)
 637                full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER);
 638        else
 639                full = queue_full(sq->queue, QUEUE_TYPE_KERNEL);
 640
 641        if (unlikely(full)) {
 642                spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 643                return -ENOMEM;
 644        }
 645
 646        if (qp->is_user)
 647                send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER);
 648        else
 649                send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
 650
 651        init_send_wqe(qp, ibwr, mask, length, send_wqe);
 652
 653        if (qp->is_user)
 654                advance_producer(sq->queue, QUEUE_TYPE_FROM_USER);
 655        else
 656                advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
 657
 658        spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 659
 660        return 0;
 661}
 662
 663static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
 664                                const struct ib_send_wr **bad_wr)
 665{
 666        int err = 0;
 667        unsigned int mask;
 668        unsigned int length = 0;
 669        int i;
 670        struct ib_send_wr *next;
 671
 672        while (wr) {
 673                mask = wr_opcode_mask(wr->opcode, qp);
 674                if (unlikely(!mask)) {
 675                        err = -EINVAL;
 676                        *bad_wr = wr;
 677                        break;
 678                }
 679
 680                if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
 681                             !(mask & WR_INLINE_MASK))) {
 682                        err = -EINVAL;
 683                        *bad_wr = wr;
 684                        break;
 685                }
 686
 687                next = wr->next;
 688
 689                length = 0;
 690                for (i = 0; i < wr->num_sge; i++)
 691                        length += wr->sg_list[i].length;
 692
 693                err = post_one_send(qp, wr, mask, length);
 694
 695                if (err) {
 696                        *bad_wr = wr;
 697                        break;
 698                }
 699                wr = next;
 700        }
 701
 702        rxe_run_task(&qp->req.task, 1);
 703        if (unlikely(qp->req.state == QP_STATE_ERROR))
 704                rxe_run_task(&qp->comp.task, 1);
 705
 706        return err;
 707}
 708
 709static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 710                         const struct ib_send_wr **bad_wr)
 711{
 712        struct rxe_qp *qp = to_rqp(ibqp);
 713
 714        if (unlikely(!qp->valid)) {
 715                *bad_wr = wr;
 716                return -EINVAL;
 717        }
 718
 719        if (unlikely(qp->req.state < QP_STATE_READY)) {
 720                *bad_wr = wr;
 721                return -EINVAL;
 722        }
 723
 724        if (qp->is_user) {
 725                /* Utilize process context to do protocol processing */
 726                rxe_run_task(&qp->req.task, 0);
 727                return 0;
 728        } else
 729                return rxe_post_send_kernel(qp, wr, bad_wr);
 730}
 731
 732static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 733                         const struct ib_recv_wr **bad_wr)
 734{
 735        int err = 0;
 736        struct rxe_qp *qp = to_rqp(ibqp);
 737        struct rxe_rq *rq = &qp->rq;
 738        unsigned long flags;
 739
 740        if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
 741                *bad_wr = wr;
 742                err = -EINVAL;
 743                goto err1;
 744        }
 745
 746        if (unlikely(qp->srq)) {
 747                *bad_wr = wr;
 748                err = -EINVAL;
 749                goto err1;
 750        }
 751
 752        spin_lock_irqsave(&rq->producer_lock, flags);
 753
 754        while (wr) {
 755                err = post_one_recv(rq, wr);
 756                if (unlikely(err)) {
 757                        *bad_wr = wr;
 758                        break;
 759                }
 760                wr = wr->next;
 761        }
 762
 763        spin_unlock_irqrestore(&rq->producer_lock, flags);
 764
 765        if (qp->resp.state == QP_STATE_ERROR)
 766                rxe_run_task(&qp->resp.task, 1);
 767
 768err1:
 769        return err;
 770}
 771
 772static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 773                         struct ib_udata *udata)
 774{
 775        int err;
 776        struct ib_device *dev = ibcq->device;
 777        struct rxe_dev *rxe = to_rdev(dev);
 778        struct rxe_cq *cq = to_rcq(ibcq);
 779        struct rxe_create_cq_resp __user *uresp = NULL;
 780
 781        if (udata) {
 782                if (udata->outlen < sizeof(*uresp))
 783                        return -EINVAL;
 784                uresp = udata->outbuf;
 785        }
 786
 787        if (attr->flags)
 788                return -EOPNOTSUPP;
 789
 790        err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
 791        if (err)
 792                return err;
 793
 794        err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata,
 795                               uresp);
 796        if (err)
 797                return err;
 798
 799        return rxe_add_to_pool(&rxe->cq_pool, cq);
 800}
 801
 802static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 803{
 804        struct rxe_cq *cq = to_rcq(ibcq);
 805
 806        rxe_cq_disable(cq);
 807
 808        rxe_drop_ref(cq);
 809        return 0;
 810}
 811
 812static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 813{
 814        int err;
 815        struct rxe_cq *cq = to_rcq(ibcq);
 816        struct rxe_dev *rxe = to_rdev(ibcq->device);
 817        struct rxe_resize_cq_resp __user *uresp = NULL;
 818
 819        if (udata) {
 820                if (udata->outlen < sizeof(*uresp))
 821                        return -EINVAL;
 822                uresp = udata->outbuf;
 823        }
 824
 825        err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
 826        if (err)
 827                goto err1;
 828
 829        err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
 830        if (err)
 831                goto err1;
 832
 833        return 0;
 834
 835err1:
 836        return err;
 837}
 838
 839static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 840{
 841        int i;
 842        struct rxe_cq *cq = to_rcq(ibcq);
 843        struct rxe_cqe *cqe;
 844        unsigned long flags;
 845
 846        spin_lock_irqsave(&cq->cq_lock, flags);
 847        for (i = 0; i < num_entries; i++) {
 848                if (cq->is_user)
 849                        cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER);
 850                else
 851                        cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
 852                if (!cqe)
 853                        break;
 854
 855                memcpy(wc++, &cqe->ibwc, sizeof(*wc));
 856                if (cq->is_user)
 857                        advance_consumer(cq->queue, QUEUE_TYPE_TO_USER);
 858                else
 859                        advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
 860        }
 861        spin_unlock_irqrestore(&cq->cq_lock, flags);
 862
 863        return i;
 864}
 865
 866static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
 867{
 868        struct rxe_cq *cq = to_rcq(ibcq);
 869        int count;
 870
 871        if (cq->is_user)
 872                count = queue_count(cq->queue, QUEUE_TYPE_TO_USER);
 873        else
 874                count = queue_count(cq->queue, QUEUE_TYPE_KERNEL);
 875
 876        return (count > wc_cnt) ? wc_cnt : count;
 877}
 878
 879static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 880{
 881        struct rxe_cq *cq = to_rcq(ibcq);
 882        unsigned long irq_flags;
 883        int ret = 0;
 884        int empty;
 885
 886        spin_lock_irqsave(&cq->cq_lock, irq_flags);
 887        if (cq->notify != IB_CQ_NEXT_COMP)
 888                cq->notify = flags & IB_CQ_SOLICITED_MASK;
 889
 890        if (cq->is_user)
 891                empty = queue_empty(cq->queue, QUEUE_TYPE_TO_USER);
 892        else
 893                empty = queue_empty(cq->queue, QUEUE_TYPE_KERNEL);
 894
 895        if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
 896                ret = 1;
 897
 898        spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
 899
 900        return ret;
 901}
 902
 903static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
 904{
 905        struct rxe_dev *rxe = to_rdev(ibpd->device);
 906        struct rxe_pd *pd = to_rpd(ibpd);
 907        struct rxe_mr *mr;
 908
 909        mr = rxe_alloc(&rxe->mr_pool);
 910        if (!mr)
 911                return ERR_PTR(-ENOMEM);
 912
 913        rxe_add_index(mr);
 914        rxe_add_ref(pd);
 915        rxe_mr_init_dma(pd, access, mr);
 916
 917        return &mr->ibmr;
 918}
 919
 920static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
 921                                     u64 start,
 922                                     u64 length,
 923                                     u64 iova,
 924                                     int access, struct ib_udata *udata)
 925{
 926        int err;
 927        struct rxe_dev *rxe = to_rdev(ibpd->device);
 928        struct rxe_pd *pd = to_rpd(ibpd);
 929        struct rxe_mr *mr;
 930
 931        mr = rxe_alloc(&rxe->mr_pool);
 932        if (!mr) {
 933                err = -ENOMEM;
 934                goto err2;
 935        }
 936
 937        rxe_add_index(mr);
 938
 939        rxe_add_ref(pd);
 940
 941        err = rxe_mr_init_user(pd, start, length, iova, access, mr);
 942        if (err)
 943                goto err3;
 944
 945        return &mr->ibmr;
 946
 947err3:
 948        rxe_drop_ref(pd);
 949        rxe_drop_index(mr);
 950        rxe_drop_ref(mr);
 951err2:
 952        return ERR_PTR(err);
 953}
 954
 955static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 956                                  u32 max_num_sg)
 957{
 958        struct rxe_dev *rxe = to_rdev(ibpd->device);
 959        struct rxe_pd *pd = to_rpd(ibpd);
 960        struct rxe_mr *mr;
 961        int err;
 962
 963        if (mr_type != IB_MR_TYPE_MEM_REG)
 964                return ERR_PTR(-EINVAL);
 965
 966        mr = rxe_alloc(&rxe->mr_pool);
 967        if (!mr) {
 968                err = -ENOMEM;
 969                goto err1;
 970        }
 971
 972        rxe_add_index(mr);
 973
 974        rxe_add_ref(pd);
 975
 976        err = rxe_mr_init_fast(pd, max_num_sg, mr);
 977        if (err)
 978                goto err2;
 979
 980        return &mr->ibmr;
 981
 982err2:
 983        rxe_drop_ref(pd);
 984        rxe_drop_index(mr);
 985        rxe_drop_ref(mr);
 986err1:
 987        return ERR_PTR(err);
 988}
 989
 990static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
 991{
 992        struct rxe_mr *mr = to_rmr(ibmr);
 993        struct rxe_map *map;
 994        struct rxe_phys_buf *buf;
 995
 996        if (unlikely(mr->nbuf == mr->num_buf))
 997                return -ENOMEM;
 998
 999        map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
1000        buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
1001
1002        buf->addr = addr;
1003        buf->size = ibmr->page_size;
1004        mr->nbuf++;
1005
1006        return 0;
1007}
1008
1009static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1010                         int sg_nents, unsigned int *sg_offset)
1011{
1012        struct rxe_mr *mr = to_rmr(ibmr);
1013        int n;
1014
1015        mr->nbuf = 0;
1016
1017        n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
1018
1019        mr->va = ibmr->iova;
1020        mr->iova = ibmr->iova;
1021        mr->length = ibmr->length;
1022        mr->page_shift = ilog2(ibmr->page_size);
1023        mr->page_mask = ibmr->page_size - 1;
1024        mr->offset = mr->iova & mr->page_mask;
1025
1026        return n;
1027}
1028
1029static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1030{
1031        int err;
1032        struct rxe_dev *rxe = to_rdev(ibqp->device);
1033        struct rxe_qp *qp = to_rqp(ibqp);
1034        struct rxe_mc_grp *grp;
1035
1036        /* takes a ref on grp if successful */
1037        err = rxe_mcast_get_grp(rxe, mgid, &grp);
1038        if (err)
1039                return err;
1040
1041        err = rxe_mcast_add_grp_elem(rxe, qp, grp);
1042
1043        rxe_drop_ref(grp);
1044        return err;
1045}
1046
1047static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1048{
1049        struct rxe_dev *rxe = to_rdev(ibqp->device);
1050        struct rxe_qp *qp = to_rqp(ibqp);
1051
1052        return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
1053}
1054
1055static ssize_t parent_show(struct device *device,
1056                           struct device_attribute *attr, char *buf)
1057{
1058        struct rxe_dev *rxe =
1059                rdma_device_to_drv_device(device, struct rxe_dev, ib_dev);
1060
1061        return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1));
1062}
1063
1064static DEVICE_ATTR_RO(parent);
1065
1066static struct attribute *rxe_dev_attributes[] = {
1067        &dev_attr_parent.attr,
1068        NULL
1069};
1070
1071static const struct attribute_group rxe_attr_group = {
1072        .attrs = rxe_dev_attributes,
1073};
1074
1075static int rxe_enable_driver(struct ib_device *ib_dev)
1076{
1077        struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
1078
1079        rxe_set_port_state(rxe);
1080        dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev));
1081        return 0;
1082}
1083
1084static const struct ib_device_ops rxe_dev_ops = {
1085        .owner = THIS_MODULE,
1086        .driver_id = RDMA_DRIVER_RXE,
1087        .uverbs_abi_ver = RXE_UVERBS_ABI_VERSION,
1088
1089        .alloc_hw_port_stats = rxe_ib_alloc_hw_port_stats,
1090        .alloc_mr = rxe_alloc_mr,
1091        .alloc_mw = rxe_alloc_mw,
1092        .alloc_pd = rxe_alloc_pd,
1093        .alloc_ucontext = rxe_alloc_ucontext,
1094        .attach_mcast = rxe_attach_mcast,
1095        .create_ah = rxe_create_ah,
1096        .create_cq = rxe_create_cq,
1097        .create_qp = rxe_create_qp,
1098        .create_srq = rxe_create_srq,
1099        .create_user_ah = rxe_create_ah,
1100        .dealloc_driver = rxe_dealloc,
1101        .dealloc_mw = rxe_dealloc_mw,
1102        .dealloc_pd = rxe_dealloc_pd,
1103        .dealloc_ucontext = rxe_dealloc_ucontext,
1104        .dereg_mr = rxe_dereg_mr,
1105        .destroy_ah = rxe_destroy_ah,
1106        .destroy_cq = rxe_destroy_cq,
1107        .destroy_qp = rxe_destroy_qp,
1108        .destroy_srq = rxe_destroy_srq,
1109        .detach_mcast = rxe_detach_mcast,
1110        .device_group = &rxe_attr_group,
1111        .enable_driver = rxe_enable_driver,
1112        .get_dma_mr = rxe_get_dma_mr,
1113        .get_hw_stats = rxe_ib_get_hw_stats,
1114        .get_link_layer = rxe_get_link_layer,
1115        .get_port_immutable = rxe_port_immutable,
1116        .map_mr_sg = rxe_map_mr_sg,
1117        .mmap = rxe_mmap,
1118        .modify_ah = rxe_modify_ah,
1119        .modify_device = rxe_modify_device,
1120        .modify_port = rxe_modify_port,
1121        .modify_qp = rxe_modify_qp,
1122        .modify_srq = rxe_modify_srq,
1123        .peek_cq = rxe_peek_cq,
1124        .poll_cq = rxe_poll_cq,
1125        .post_recv = rxe_post_recv,
1126        .post_send = rxe_post_send,
1127        .post_srq_recv = rxe_post_srq_recv,
1128        .query_ah = rxe_query_ah,
1129        .query_device = rxe_query_device,
1130        .query_pkey = rxe_query_pkey,
1131        .query_port = rxe_query_port,
1132        .query_qp = rxe_query_qp,
1133        .query_srq = rxe_query_srq,
1134        .reg_user_mr = rxe_reg_user_mr,
1135        .req_notify_cq = rxe_req_notify_cq,
1136        .resize_cq = rxe_resize_cq,
1137
1138        INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
1139        INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
1140        INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
1141        INIT_RDMA_OBJ_SIZE(ib_qp, rxe_qp, ibqp),
1142        INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq),
1143        INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
1144        INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw),
1145};
1146
1147int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
1148{
1149        int err;
1150        struct ib_device *dev = &rxe->ib_dev;
1151
1152        strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1153
1154        dev->node_type = RDMA_NODE_IB_CA;
1155        dev->phys_port_cnt = 1;
1156        dev->num_comp_vectors = num_possible_cpus();
1157        dev->local_dma_lkey = 0;
1158        addrconf_addr_eui48((unsigned char *)&dev->node_guid,
1159                            rxe->ndev->dev_addr);
1160
1161        dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
1162                                BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
1163
1164        ib_set_device_ops(dev, &rxe_dev_ops);
1165        err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1);
1166        if (err)
1167                return err;
1168
1169        err = rxe_icrc_init(rxe);
1170        if (err)
1171                return err;
1172
1173        err = ib_register_device(dev, ibdev_name, NULL);
1174        if (err)
1175                pr_warn("%s failed with error %d\n", __func__, err);
1176
1177        /*
1178         * Note that rxe may be invalid at this point if another thread
1179         * unregistered it.
1180         */
1181        return err;
1182}
1183