linux/drivers/infiniband/sw/rxe/rxe_verbs.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   5 */
   6
   7#include <linux/dma-mapping.h>
   8#include <net/addrconf.h>
   9#include <rdma/uverbs_ioctl.h>
  10#include "rxe.h"
  11#include "rxe_loc.h"
  12#include "rxe_queue.h"
  13#include "rxe_hw_counters.h"
  14
  15static int rxe_query_device(struct ib_device *dev,
  16                            struct ib_device_attr *attr,
  17                            struct ib_udata *uhw)
  18{
  19        struct rxe_dev *rxe = to_rdev(dev);
  20
  21        if (uhw->inlen || uhw->outlen)
  22                return -EINVAL;
  23
  24        *attr = rxe->attr;
  25        return 0;
  26}
  27
  28static int rxe_query_port(struct ib_device *dev,
  29                          u32 port_num, struct ib_port_attr *attr)
  30{
  31        struct rxe_dev *rxe = to_rdev(dev);
  32        struct rxe_port *port;
  33        int rc;
  34
  35        port = &rxe->port;
  36
  37        /* *attr being zeroed by the caller, avoid zeroing it here */
  38        *attr = port->attr;
  39
  40        mutex_lock(&rxe->usdev_lock);
  41        rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
  42                              &attr->active_width);
  43
  44        if (attr->state == IB_PORT_ACTIVE)
  45                attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
  46        else if (dev_get_flags(rxe->ndev) & IFF_UP)
  47                attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
  48        else
  49                attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
  50
  51        mutex_unlock(&rxe->usdev_lock);
  52
  53        return rc;
  54}
  55
  56static int rxe_query_pkey(struct ib_device *device,
  57                          u32 port_num, u16 index, u16 *pkey)
  58{
  59        if (index > 0)
  60                return -EINVAL;
  61
  62        *pkey = IB_DEFAULT_PKEY_FULL;
  63        return 0;
  64}
  65
  66static int rxe_modify_device(struct ib_device *dev,
  67                             int mask, struct ib_device_modify *attr)
  68{
  69        struct rxe_dev *rxe = to_rdev(dev);
  70
  71        if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
  72                     IB_DEVICE_MODIFY_NODE_DESC))
  73                return -EOPNOTSUPP;
  74
  75        if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
  76                rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
  77
  78        if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
  79                memcpy(rxe->ib_dev.node_desc,
  80                       attr->node_desc, sizeof(rxe->ib_dev.node_desc));
  81        }
  82
  83        return 0;
  84}
  85
  86static int rxe_modify_port(struct ib_device *dev,
  87                           u32 port_num, int mask, struct ib_port_modify *attr)
  88{
  89        struct rxe_dev *rxe = to_rdev(dev);
  90        struct rxe_port *port;
  91
  92        port = &rxe->port;
  93
  94        port->attr.port_cap_flags |= attr->set_port_cap_mask;
  95        port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
  96
  97        if (mask & IB_PORT_RESET_QKEY_CNTR)
  98                port->attr.qkey_viol_cntr = 0;
  99
 100        return 0;
 101}
 102
 103static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
 104                                               u32 port_num)
 105{
 106        return IB_LINK_LAYER_ETHERNET;
 107}
 108
 109static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata)
 110{
 111        struct rxe_dev *rxe = to_rdev(ibuc->device);
 112        struct rxe_ucontext *uc = to_ruc(ibuc);
 113
 114        return rxe_add_to_pool(&rxe->uc_pool, uc);
 115}
 116
 117static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
 118{
 119        struct rxe_ucontext *uc = to_ruc(ibuc);
 120
 121        rxe_drop_ref(uc);
 122}
 123
 124static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
 125                              struct ib_port_immutable *immutable)
 126{
 127        int err;
 128        struct ib_port_attr attr;
 129
 130        immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
 131
 132        err = ib_query_port(dev, port_num, &attr);
 133        if (err)
 134                return err;
 135
 136        immutable->pkey_tbl_len = attr.pkey_tbl_len;
 137        immutable->gid_tbl_len = attr.gid_tbl_len;
 138        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 139
 140        return 0;
 141}
 142
 143static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 144{
 145        struct rxe_dev *rxe = to_rdev(ibpd->device);
 146        struct rxe_pd *pd = to_rpd(ibpd);
 147
 148        return rxe_add_to_pool(&rxe->pd_pool, pd);
 149}
 150
 151static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 152{
 153        struct rxe_pd *pd = to_rpd(ibpd);
 154
 155        rxe_drop_ref(pd);
 156        return 0;
 157}
 158
 159static int rxe_create_ah(struct ib_ah *ibah,
 160                         struct rdma_ah_init_attr *init_attr,
 161                         struct ib_udata *udata)
 162
 163{
 164        int err;
 165        struct rxe_dev *rxe = to_rdev(ibah->device);
 166        struct rxe_ah *ah = to_rah(ibah);
 167
 168        err = rxe_av_chk_attr(rxe, init_attr->ah_attr);
 169        if (err)
 170                return err;
 171
 172        err = rxe_add_to_pool(&rxe->ah_pool, ah);
 173        if (err)
 174                return err;
 175
 176        rxe_init_av(init_attr->ah_attr, &ah->av);
 177        return 0;
 178}
 179
 180static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 181{
 182        int err;
 183        struct rxe_dev *rxe = to_rdev(ibah->device);
 184        struct rxe_ah *ah = to_rah(ibah);
 185
 186        err = rxe_av_chk_attr(rxe, attr);
 187        if (err)
 188                return err;
 189
 190        rxe_init_av(attr, &ah->av);
 191        return 0;
 192}
 193
 194static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 195{
 196        struct rxe_ah *ah = to_rah(ibah);
 197
 198        memset(attr, 0, sizeof(*attr));
 199        attr->type = ibah->type;
 200        rxe_av_to_attr(&ah->av, attr);
 201        return 0;
 202}
 203
 204static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
 205{
 206        struct rxe_ah *ah = to_rah(ibah);
 207
 208        rxe_drop_ref(ah);
 209        return 0;
 210}
 211
 212static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
 213{
 214        int err;
 215        int i;
 216        u32 length;
 217        struct rxe_recv_wqe *recv_wqe;
 218        int num_sge = ibwr->num_sge;
 219        int full;
 220
 221        if (rq->is_user)
 222                full = queue_full(rq->queue, QUEUE_TYPE_FROM_USER);
 223        else
 224                full = queue_full(rq->queue, QUEUE_TYPE_KERNEL);
 225
 226        if (unlikely(full)) {
 227                err = -ENOMEM;
 228                goto err1;
 229        }
 230
 231        if (unlikely(num_sge > rq->max_sge)) {
 232                err = -EINVAL;
 233                goto err1;
 234        }
 235
 236        length = 0;
 237        for (i = 0; i < num_sge; i++)
 238                length += ibwr->sg_list[i].length;
 239
 240        if (rq->is_user)
 241                recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_FROM_USER);
 242        else
 243                recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_KERNEL);
 244
 245        recv_wqe->wr_id = ibwr->wr_id;
 246        recv_wqe->num_sge = num_sge;
 247
 248        memcpy(recv_wqe->dma.sge, ibwr->sg_list,
 249               num_sge * sizeof(struct ib_sge));
 250
 251        recv_wqe->dma.length            = length;
 252        recv_wqe->dma.resid             = length;
 253        recv_wqe->dma.num_sge           = num_sge;
 254        recv_wqe->dma.cur_sge           = 0;
 255        recv_wqe->dma.sge_offset        = 0;
 256
 257        if (rq->is_user)
 258                advance_producer(rq->queue, QUEUE_TYPE_FROM_USER);
 259        else
 260                advance_producer(rq->queue, QUEUE_TYPE_KERNEL);
 261
 262        return 0;
 263
 264err1:
 265        return err;
 266}
 267
 268static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
 269                          struct ib_udata *udata)
 270{
 271        int err;
 272        struct rxe_dev *rxe = to_rdev(ibsrq->device);
 273        struct rxe_pd *pd = to_rpd(ibsrq->pd);
 274        struct rxe_srq *srq = to_rsrq(ibsrq);
 275        struct rxe_create_srq_resp __user *uresp = NULL;
 276
 277        if (init->srq_type != IB_SRQT_BASIC)
 278                return -EOPNOTSUPP;
 279
 280        if (udata) {
 281                if (udata->outlen < sizeof(*uresp))
 282                        return -EINVAL;
 283                uresp = udata->outbuf;
 284                srq->is_user = true;
 285        } else {
 286                srq->is_user = false;
 287        }
 288
 289        err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
 290        if (err)
 291                goto err1;
 292
 293        err = rxe_add_to_pool(&rxe->srq_pool, srq);
 294        if (err)
 295                goto err1;
 296
 297        rxe_add_ref(pd);
 298        srq->pd = pd;
 299
 300        err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
 301        if (err)
 302                goto err2;
 303
 304        return 0;
 305
 306err2:
 307        rxe_drop_ref(pd);
 308        rxe_drop_ref(srq);
 309err1:
 310        return err;
 311}
 312
 313static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 314                          enum ib_srq_attr_mask mask,
 315                          struct ib_udata *udata)
 316{
 317        int err;
 318        struct rxe_srq *srq = to_rsrq(ibsrq);
 319        struct rxe_dev *rxe = to_rdev(ibsrq->device);
 320        struct rxe_modify_srq_cmd ucmd = {};
 321
 322        if (udata) {
 323                if (udata->inlen < sizeof(ucmd))
 324                        return -EINVAL;
 325
 326                err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
 327                if (err)
 328                        return err;
 329        }
 330
 331        err = rxe_srq_chk_attr(rxe, srq, attr, mask);
 332        if (err)
 333                goto err1;
 334
 335        err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
 336        if (err)
 337                goto err1;
 338
 339        return 0;
 340
 341err1:
 342        return err;
 343}
 344
 345static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
 346{
 347        struct rxe_srq *srq = to_rsrq(ibsrq);
 348
 349        if (srq->error)
 350                return -EINVAL;
 351
 352        attr->max_wr = srq->rq.queue->buf->index_mask;
 353        attr->max_sge = srq->rq.max_sge;
 354        attr->srq_limit = srq->limit;
 355        return 0;
 356}
 357
 358static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
 359{
 360        struct rxe_srq *srq = to_rsrq(ibsrq);
 361
 362        if (srq->rq.queue)
 363                rxe_queue_cleanup(srq->rq.queue);
 364
 365        rxe_drop_ref(srq->pd);
 366        rxe_drop_ref(srq);
 367        return 0;
 368}
 369
 370static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 371                             const struct ib_recv_wr **bad_wr)
 372{
 373        int err = 0;
 374        unsigned long flags;
 375        struct rxe_srq *srq = to_rsrq(ibsrq);
 376
 377        spin_lock_irqsave(&srq->rq.producer_lock, flags);
 378
 379        while (wr) {
 380                err = post_one_recv(&srq->rq, wr);
 381                if (unlikely(err))
 382                        break;
 383                wr = wr->next;
 384        }
 385
 386        spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
 387
 388        if (err)
 389                *bad_wr = wr;
 390
 391        return err;
 392}
 393
 394static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
 395                                   struct ib_qp_init_attr *init,
 396                                   struct ib_udata *udata)
 397{
 398        int err;
 399        struct rxe_dev *rxe = to_rdev(ibpd->device);
 400        struct rxe_pd *pd = to_rpd(ibpd);
 401        struct rxe_qp *qp;
 402        struct rxe_create_qp_resp __user *uresp = NULL;
 403
 404        if (udata) {
 405                if (udata->outlen < sizeof(*uresp))
 406                        return ERR_PTR(-EINVAL);
 407                uresp = udata->outbuf;
 408        }
 409
 410        if (init->create_flags)
 411                return ERR_PTR(-EOPNOTSUPP);
 412
 413        err = rxe_qp_chk_init(rxe, init);
 414        if (err)
 415                goto err1;
 416
 417        qp = rxe_alloc(&rxe->qp_pool);
 418        if (!qp) {
 419                err = -ENOMEM;
 420                goto err1;
 421        }
 422
 423        if (udata) {
 424                if (udata->inlen) {
 425                        err = -EINVAL;
 426                        goto err2;
 427                }
 428                qp->is_user = true;
 429        } else {
 430                qp->is_user = false;
 431        }
 432
 433        rxe_add_index(qp);
 434
 435        err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd, udata);
 436        if (err)
 437                goto err3;
 438
 439        return &qp->ibqp;
 440
 441err3:
 442        rxe_drop_index(qp);
 443err2:
 444        rxe_drop_ref(qp);
 445err1:
 446        return ERR_PTR(err);
 447}
 448
 449static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 450                         int mask, struct ib_udata *udata)
 451{
 452        int err;
 453        struct rxe_dev *rxe = to_rdev(ibqp->device);
 454        struct rxe_qp *qp = to_rqp(ibqp);
 455
 456        if (mask & ~IB_QP_ATTR_STANDARD_BITS)
 457                return -EOPNOTSUPP;
 458
 459        err = rxe_qp_chk_attr(rxe, qp, attr, mask);
 460        if (err)
 461                goto err1;
 462
 463        err = rxe_qp_from_attr(qp, attr, mask, udata);
 464        if (err)
 465                goto err1;
 466
 467        return 0;
 468
 469err1:
 470        return err;
 471}
 472
 473static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 474                        int mask, struct ib_qp_init_attr *init)
 475{
 476        struct rxe_qp *qp = to_rqp(ibqp);
 477
 478        rxe_qp_to_init(qp, init);
 479        rxe_qp_to_attr(qp, attr, mask);
 480
 481        return 0;
 482}
 483
 484static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 485{
 486        struct rxe_qp *qp = to_rqp(ibqp);
 487
 488        rxe_qp_destroy(qp);
 489        rxe_drop_index(qp);
 490        rxe_drop_ref(qp);
 491        return 0;
 492}
 493
 494static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 495                            unsigned int mask, unsigned int length)
 496{
 497        int num_sge = ibwr->num_sge;
 498        struct rxe_sq *sq = &qp->sq;
 499
 500        if (unlikely(num_sge > sq->max_sge))
 501                goto err1;
 502
 503        if (unlikely(mask & WR_ATOMIC_MASK)) {
 504                if (length < 8)
 505                        goto err1;
 506
 507                if (atomic_wr(ibwr)->remote_addr & 0x7)
 508                        goto err1;
 509        }
 510
 511        if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
 512                     (length > sq->max_inline)))
 513                goto err1;
 514
 515        return 0;
 516
 517err1:
 518        return -EINVAL;
 519}
 520
 521static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
 522                         const struct ib_send_wr *ibwr)
 523{
 524        wr->wr_id = ibwr->wr_id;
 525        wr->num_sge = ibwr->num_sge;
 526        wr->opcode = ibwr->opcode;
 527        wr->send_flags = ibwr->send_flags;
 528
 529        if (qp_type(qp) == IB_QPT_UD ||
 530            qp_type(qp) == IB_QPT_SMI ||
 531            qp_type(qp) == IB_QPT_GSI) {
 532                wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
 533                wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
 534                if (qp_type(qp) == IB_QPT_GSI)
 535                        wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
 536                if (wr->opcode == IB_WR_SEND_WITH_IMM)
 537                        wr->ex.imm_data = ibwr->ex.imm_data;
 538        } else {
 539                switch (wr->opcode) {
 540                case IB_WR_RDMA_WRITE_WITH_IMM:
 541                        wr->ex.imm_data = ibwr->ex.imm_data;
 542                        fallthrough;
 543                case IB_WR_RDMA_READ:
 544                case IB_WR_RDMA_WRITE:
 545                        wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
 546                        wr->wr.rdma.rkey        = rdma_wr(ibwr)->rkey;
 547                        break;
 548                case IB_WR_SEND_WITH_IMM:
 549                        wr->ex.imm_data = ibwr->ex.imm_data;
 550                        break;
 551                case IB_WR_SEND_WITH_INV:
 552                        wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
 553                        break;
 554                case IB_WR_ATOMIC_CMP_AND_SWP:
 555                case IB_WR_ATOMIC_FETCH_AND_ADD:
 556                        wr->wr.atomic.remote_addr =
 557                                atomic_wr(ibwr)->remote_addr;
 558                        wr->wr.atomic.compare_add =
 559                                atomic_wr(ibwr)->compare_add;
 560                        wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
 561                        wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
 562                        break;
 563                case IB_WR_LOCAL_INV:
 564                        wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
 565                break;
 566                case IB_WR_REG_MR:
 567                        wr->wr.reg.mr = reg_wr(ibwr)->mr;
 568                        wr->wr.reg.key = reg_wr(ibwr)->key;
 569                        wr->wr.reg.access = reg_wr(ibwr)->access;
 570                break;
 571                default:
 572                        break;
 573                }
 574        }
 575}
 576
 577static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
 578                                    const struct ib_send_wr *ibwr)
 579{
 580        struct ib_sge *sge = ibwr->sg_list;
 581        u8 *p = wqe->dma.inline_data;
 582        int i;
 583
 584        for (i = 0; i < ibwr->num_sge; i++, sge++) {
 585                memcpy(p, (void *)(uintptr_t)sge->addr, sge->length);
 586                p += sge->length;
 587        }
 588}
 589
 590static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 591                         unsigned int mask, unsigned int length,
 592                         struct rxe_send_wqe *wqe)
 593{
 594        int num_sge = ibwr->num_sge;
 595
 596        init_send_wr(qp, &wqe->wr, ibwr);
 597
 598        /* local operation */
 599        if (unlikely(mask & WR_LOCAL_OP_MASK)) {
 600                wqe->mask = mask;
 601                wqe->state = wqe_state_posted;
 602                return;
 603        }
 604
 605        if (qp_type(qp) == IB_QPT_UD ||
 606            qp_type(qp) == IB_QPT_SMI ||
 607            qp_type(qp) == IB_QPT_GSI)
 608                memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av));
 609
 610        if (unlikely(ibwr->send_flags & IB_SEND_INLINE))
 611                copy_inline_data_to_wqe(wqe, ibwr);
 612        else
 613                memcpy(wqe->dma.sge, ibwr->sg_list,
 614                       num_sge * sizeof(struct ib_sge));
 615
 616        wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
 617                mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
 618        wqe->mask               = mask;
 619        wqe->dma.length         = length;
 620        wqe->dma.resid          = length;
 621        wqe->dma.num_sge        = num_sge;
 622        wqe->dma.cur_sge        = 0;
 623        wqe->dma.sge_offset     = 0;
 624        wqe->state              = wqe_state_posted;
 625        wqe->ssn                = atomic_add_return(1, &qp->ssn);
 626}
 627
 628static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 629                         unsigned int mask, u32 length)
 630{
 631        int err;
 632        struct rxe_sq *sq = &qp->sq;
 633        struct rxe_send_wqe *send_wqe;
 634        unsigned long flags;
 635        int full;
 636
 637        err = validate_send_wr(qp, ibwr, mask, length);
 638        if (err)
 639                return err;
 640
 641        spin_lock_irqsave(&qp->sq.sq_lock, flags);
 642
 643        if (qp->is_user)
 644                full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER);
 645        else
 646                full = queue_full(sq->queue, QUEUE_TYPE_KERNEL);
 647
 648        if (unlikely(full)) {
 649                spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 650                return -ENOMEM;
 651        }
 652
 653        if (qp->is_user)
 654                send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER);
 655        else
 656                send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
 657
 658        init_send_wqe(qp, ibwr, mask, length, send_wqe);
 659
 660        if (qp->is_user)
 661                advance_producer(sq->queue, QUEUE_TYPE_FROM_USER);
 662        else
 663                advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
 664
 665        spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
 666
 667        return 0;
 668}
 669
 670static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
 671                                const struct ib_send_wr **bad_wr)
 672{
 673        int err = 0;
 674        unsigned int mask;
 675        unsigned int length = 0;
 676        int i;
 677        struct ib_send_wr *next;
 678
 679        while (wr) {
 680                mask = wr_opcode_mask(wr->opcode, qp);
 681                if (unlikely(!mask)) {
 682                        err = -EINVAL;
 683                        *bad_wr = wr;
 684                        break;
 685                }
 686
 687                if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
 688                             !(mask & WR_INLINE_MASK))) {
 689                        err = -EINVAL;
 690                        *bad_wr = wr;
 691                        break;
 692                }
 693
 694                next = wr->next;
 695
 696                length = 0;
 697                for (i = 0; i < wr->num_sge; i++)
 698                        length += wr->sg_list[i].length;
 699
 700                err = post_one_send(qp, wr, mask, length);
 701
 702                if (err) {
 703                        *bad_wr = wr;
 704                        break;
 705                }
 706                wr = next;
 707        }
 708
 709        rxe_run_task(&qp->req.task, 1);
 710        if (unlikely(qp->req.state == QP_STATE_ERROR))
 711                rxe_run_task(&qp->comp.task, 1);
 712
 713        return err;
 714}
 715
 716static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 717                         const struct ib_send_wr **bad_wr)
 718{
 719        struct rxe_qp *qp = to_rqp(ibqp);
 720
 721        if (unlikely(!qp->valid)) {
 722                *bad_wr = wr;
 723                return -EINVAL;
 724        }
 725
 726        if (unlikely(qp->req.state < QP_STATE_READY)) {
 727                *bad_wr = wr;
 728                return -EINVAL;
 729        }
 730
 731        if (qp->is_user) {
 732                /* Utilize process context to do protocol processing */
 733                rxe_run_task(&qp->req.task, 0);
 734                return 0;
 735        } else
 736                return rxe_post_send_kernel(qp, wr, bad_wr);
 737}
 738
 739static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 740                         const struct ib_recv_wr **bad_wr)
 741{
 742        int err = 0;
 743        struct rxe_qp *qp = to_rqp(ibqp);
 744        struct rxe_rq *rq = &qp->rq;
 745        unsigned long flags;
 746
 747        if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
 748                *bad_wr = wr;
 749                err = -EINVAL;
 750                goto err1;
 751        }
 752
 753        if (unlikely(qp->srq)) {
 754                *bad_wr = wr;
 755                err = -EINVAL;
 756                goto err1;
 757        }
 758
 759        spin_lock_irqsave(&rq->producer_lock, flags);
 760
 761        while (wr) {
 762                err = post_one_recv(rq, wr);
 763                if (unlikely(err)) {
 764                        *bad_wr = wr;
 765                        break;
 766                }
 767                wr = wr->next;
 768        }
 769
 770        spin_unlock_irqrestore(&rq->producer_lock, flags);
 771
 772        if (qp->resp.state == QP_STATE_ERROR)
 773                rxe_run_task(&qp->resp.task, 1);
 774
 775err1:
 776        return err;
 777}
 778
 779static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 780                         struct ib_udata *udata)
 781{
 782        int err;
 783        struct ib_device *dev = ibcq->device;
 784        struct rxe_dev *rxe = to_rdev(dev);
 785        struct rxe_cq *cq = to_rcq(ibcq);
 786        struct rxe_create_cq_resp __user *uresp = NULL;
 787
 788        if (udata) {
 789                if (udata->outlen < sizeof(*uresp))
 790                        return -EINVAL;
 791                uresp = udata->outbuf;
 792        }
 793
 794        if (attr->flags)
 795                return -EOPNOTSUPP;
 796
 797        err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
 798        if (err)
 799                return err;
 800
 801        err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata,
 802                               uresp);
 803        if (err)
 804                return err;
 805
 806        return rxe_add_to_pool(&rxe->cq_pool, cq);
 807}
 808
 809static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 810{
 811        struct rxe_cq *cq = to_rcq(ibcq);
 812
 813        rxe_cq_disable(cq);
 814
 815        rxe_drop_ref(cq);
 816        return 0;
 817}
 818
 819static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 820{
 821        int err;
 822        struct rxe_cq *cq = to_rcq(ibcq);
 823        struct rxe_dev *rxe = to_rdev(ibcq->device);
 824        struct rxe_resize_cq_resp __user *uresp = NULL;
 825
 826        if (udata) {
 827                if (udata->outlen < sizeof(*uresp))
 828                        return -EINVAL;
 829                uresp = udata->outbuf;
 830        }
 831
 832        err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
 833        if (err)
 834                goto err1;
 835
 836        err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
 837        if (err)
 838                goto err1;
 839
 840        return 0;
 841
 842err1:
 843        return err;
 844}
 845
 846static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 847{
 848        int i;
 849        struct rxe_cq *cq = to_rcq(ibcq);
 850        struct rxe_cqe *cqe;
 851        unsigned long flags;
 852
 853        spin_lock_irqsave(&cq->cq_lock, flags);
 854        for (i = 0; i < num_entries; i++) {
 855                if (cq->is_user)
 856                        cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER);
 857                else
 858                        cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
 859                if (!cqe)
 860                        break;
 861
 862                memcpy(wc++, &cqe->ibwc, sizeof(*wc));
 863                if (cq->is_user)
 864                        advance_consumer(cq->queue, QUEUE_TYPE_TO_USER);
 865                else
 866                        advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
 867        }
 868        spin_unlock_irqrestore(&cq->cq_lock, flags);
 869
 870        return i;
 871}
 872
 873static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
 874{
 875        struct rxe_cq *cq = to_rcq(ibcq);
 876        int count;
 877
 878        if (cq->is_user)
 879                count = queue_count(cq->queue, QUEUE_TYPE_TO_USER);
 880        else
 881                count = queue_count(cq->queue, QUEUE_TYPE_KERNEL);
 882
 883        return (count > wc_cnt) ? wc_cnt : count;
 884}
 885
 886static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 887{
 888        struct rxe_cq *cq = to_rcq(ibcq);
 889        unsigned long irq_flags;
 890        int ret = 0;
 891        int empty;
 892
 893        spin_lock_irqsave(&cq->cq_lock, irq_flags);
 894        if (cq->notify != IB_CQ_NEXT_COMP)
 895                cq->notify = flags & IB_CQ_SOLICITED_MASK;
 896
 897        if (cq->is_user)
 898                empty = queue_empty(cq->queue, QUEUE_TYPE_TO_USER);
 899        else
 900                empty = queue_empty(cq->queue, QUEUE_TYPE_KERNEL);
 901
 902        if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
 903                ret = 1;
 904
 905        spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
 906
 907        return ret;
 908}
 909
 910static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
 911{
 912        struct rxe_dev *rxe = to_rdev(ibpd->device);
 913        struct rxe_pd *pd = to_rpd(ibpd);
 914        struct rxe_mr *mr;
 915
 916        mr = rxe_alloc(&rxe->mr_pool);
 917        if (!mr)
 918                return ERR_PTR(-ENOMEM);
 919
 920        rxe_add_index(mr);
 921        rxe_add_ref(pd);
 922        rxe_mr_init_dma(pd, access, mr);
 923
 924        return &mr->ibmr;
 925}
 926
 927static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
 928                                     u64 start,
 929                                     u64 length,
 930                                     u64 iova,
 931                                     int access, struct ib_udata *udata)
 932{
 933        int err;
 934        struct rxe_dev *rxe = to_rdev(ibpd->device);
 935        struct rxe_pd *pd = to_rpd(ibpd);
 936        struct rxe_mr *mr;
 937
 938        mr = rxe_alloc(&rxe->mr_pool);
 939        if (!mr) {
 940                err = -ENOMEM;
 941                goto err2;
 942        }
 943
 944        rxe_add_index(mr);
 945
 946        rxe_add_ref(pd);
 947
 948        err = rxe_mr_init_user(pd, start, length, iova, access, mr);
 949        if (err)
 950                goto err3;
 951
 952        return &mr->ibmr;
 953
 954err3:
 955        rxe_drop_ref(pd);
 956        rxe_drop_index(mr);
 957        rxe_drop_ref(mr);
 958err2:
 959        return ERR_PTR(err);
 960}
 961
 962static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 963                                  u32 max_num_sg)
 964{
 965        struct rxe_dev *rxe = to_rdev(ibpd->device);
 966        struct rxe_pd *pd = to_rpd(ibpd);
 967        struct rxe_mr *mr;
 968        int err;
 969
 970        if (mr_type != IB_MR_TYPE_MEM_REG)
 971                return ERR_PTR(-EINVAL);
 972
 973        mr = rxe_alloc(&rxe->mr_pool);
 974        if (!mr) {
 975                err = -ENOMEM;
 976                goto err1;
 977        }
 978
 979        rxe_add_index(mr);
 980
 981        rxe_add_ref(pd);
 982
 983        err = rxe_mr_init_fast(pd, max_num_sg, mr);
 984        if (err)
 985                goto err2;
 986
 987        return &mr->ibmr;
 988
 989err2:
 990        rxe_drop_ref(pd);
 991        rxe_drop_index(mr);
 992        rxe_drop_ref(mr);
 993err1:
 994        return ERR_PTR(err);
 995}
 996
 997static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
 998{
 999        struct rxe_mr *mr = to_rmr(ibmr);
1000        struct rxe_map *map;
1001        struct rxe_phys_buf *buf;
1002
1003        if (unlikely(mr->nbuf == mr->num_buf))
1004                return -ENOMEM;
1005
1006        map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
1007        buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
1008
1009        buf->addr = addr;
1010        buf->size = ibmr->page_size;
1011        mr->nbuf++;
1012
1013        return 0;
1014}
1015
1016static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1017                         int sg_nents, unsigned int *sg_offset)
1018{
1019        struct rxe_mr *mr = to_rmr(ibmr);
1020        int n;
1021
1022        mr->nbuf = 0;
1023
1024        n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
1025
1026        mr->va = ibmr->iova;
1027        mr->iova = ibmr->iova;
1028        mr->length = ibmr->length;
1029        mr->page_shift = ilog2(ibmr->page_size);
1030        mr->page_mask = ibmr->page_size - 1;
1031        mr->offset = mr->iova & mr->page_mask;
1032
1033        return n;
1034}
1035
1036static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1037{
1038        int err;
1039        struct rxe_dev *rxe = to_rdev(ibqp->device);
1040        struct rxe_qp *qp = to_rqp(ibqp);
1041        struct rxe_mc_grp *grp;
1042
1043        /* takes a ref on grp if successful */
1044        err = rxe_mcast_get_grp(rxe, mgid, &grp);
1045        if (err)
1046                return err;
1047
1048        err = rxe_mcast_add_grp_elem(rxe, qp, grp);
1049
1050        rxe_drop_ref(grp);
1051        return err;
1052}
1053
1054static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1055{
1056        struct rxe_dev *rxe = to_rdev(ibqp->device);
1057        struct rxe_qp *qp = to_rqp(ibqp);
1058
1059        return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
1060}
1061
1062static ssize_t parent_show(struct device *device,
1063                           struct device_attribute *attr, char *buf)
1064{
1065        struct rxe_dev *rxe =
1066                rdma_device_to_drv_device(device, struct rxe_dev, ib_dev);
1067
1068        return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1));
1069}
1070
1071static DEVICE_ATTR_RO(parent);
1072
1073static struct attribute *rxe_dev_attributes[] = {
1074        &dev_attr_parent.attr,
1075        NULL
1076};
1077
1078static const struct attribute_group rxe_attr_group = {
1079        .attrs = rxe_dev_attributes,
1080};
1081
1082static int rxe_enable_driver(struct ib_device *ib_dev)
1083{
1084        struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
1085
1086        rxe_set_port_state(rxe);
1087        dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev));
1088        return 0;
1089}
1090
1091static const struct ib_device_ops rxe_dev_ops = {
1092        .owner = THIS_MODULE,
1093        .driver_id = RDMA_DRIVER_RXE,
1094        .uverbs_abi_ver = RXE_UVERBS_ABI_VERSION,
1095
1096        .alloc_hw_port_stats = rxe_ib_alloc_hw_port_stats,
1097        .alloc_mr = rxe_alloc_mr,
1098        .alloc_mw = rxe_alloc_mw,
1099        .alloc_pd = rxe_alloc_pd,
1100        .alloc_ucontext = rxe_alloc_ucontext,
1101        .attach_mcast = rxe_attach_mcast,
1102        .create_ah = rxe_create_ah,
1103        .create_cq = rxe_create_cq,
1104        .create_qp = rxe_create_qp,
1105        .create_srq = rxe_create_srq,
1106        .create_user_ah = rxe_create_ah,
1107        .dealloc_driver = rxe_dealloc,
1108        .dealloc_mw = rxe_dealloc_mw,
1109        .dealloc_pd = rxe_dealloc_pd,
1110        .dealloc_ucontext = rxe_dealloc_ucontext,
1111        .dereg_mr = rxe_dereg_mr,
1112        .destroy_ah = rxe_destroy_ah,
1113        .destroy_cq = rxe_destroy_cq,
1114        .destroy_qp = rxe_destroy_qp,
1115        .destroy_srq = rxe_destroy_srq,
1116        .detach_mcast = rxe_detach_mcast,
1117        .device_group = &rxe_attr_group,
1118        .enable_driver = rxe_enable_driver,
1119        .get_dma_mr = rxe_get_dma_mr,
1120        .get_hw_stats = rxe_ib_get_hw_stats,
1121        .get_link_layer = rxe_get_link_layer,
1122        .get_port_immutable = rxe_port_immutable,
1123        .map_mr_sg = rxe_map_mr_sg,
1124        .mmap = rxe_mmap,
1125        .modify_ah = rxe_modify_ah,
1126        .modify_device = rxe_modify_device,
1127        .modify_port = rxe_modify_port,
1128        .modify_qp = rxe_modify_qp,
1129        .modify_srq = rxe_modify_srq,
1130        .peek_cq = rxe_peek_cq,
1131        .poll_cq = rxe_poll_cq,
1132        .post_recv = rxe_post_recv,
1133        .post_send = rxe_post_send,
1134        .post_srq_recv = rxe_post_srq_recv,
1135        .query_ah = rxe_query_ah,
1136        .query_device = rxe_query_device,
1137        .query_pkey = rxe_query_pkey,
1138        .query_port = rxe_query_port,
1139        .query_qp = rxe_query_qp,
1140        .query_srq = rxe_query_srq,
1141        .reg_user_mr = rxe_reg_user_mr,
1142        .req_notify_cq = rxe_req_notify_cq,
1143        .resize_cq = rxe_resize_cq,
1144
1145        INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
1146        INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
1147        INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
1148        INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq),
1149        INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
1150        INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw),
1151};
1152
1153int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
1154{
1155        int err;
1156        struct ib_device *dev = &rxe->ib_dev;
1157        struct crypto_shash *tfm;
1158
1159        strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1160
1161        dev->node_type = RDMA_NODE_IB_CA;
1162        dev->phys_port_cnt = 1;
1163        dev->num_comp_vectors = num_possible_cpus();
1164        dev->local_dma_lkey = 0;
1165        addrconf_addr_eui48((unsigned char *)&dev->node_guid,
1166                            rxe->ndev->dev_addr);
1167
1168        dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
1169                                BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
1170
1171        ib_set_device_ops(dev, &rxe_dev_ops);
1172        err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1);
1173        if (err)
1174                return err;
1175
1176        tfm = crypto_alloc_shash("crc32", 0, 0);
1177        if (IS_ERR(tfm)) {
1178                pr_err("failed to allocate crc algorithm err:%ld\n",
1179                       PTR_ERR(tfm));
1180                return PTR_ERR(tfm);
1181        }
1182        rxe->tfm = tfm;
1183
1184        err = ib_register_device(dev, ibdev_name, NULL);
1185        if (err)
1186                pr_warn("%s failed with error %d\n", __func__, err);
1187
1188        /*
1189         * Note that rxe may be invalid at this point if another thread
1190         * unregistered it.
1191         */
1192        return err;
1193}
1194