linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of EITHER the GNU General Public License
   6 * version 2 as published by the Free Software Foundation or the BSD
   7 * 2-Clause License. This program is distributed in the hope that it
   8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
   9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  10 * See the GNU General Public License version 2 for more details at
  11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program available in the file COPYING in the main
  15 * directory of this source tree.
  16 *
  17 * The BSD 2-Clause License
  18 *
  19 *     Redistribution and use in source and binary forms, with or
  20 *     without modification, are permitted provided that the following
  21 *     conditions are met:
  22 *
  23 *      - Redistributions of source code must retain the above
  24 *        copyright notice, this list of conditions and the following
  25 *        disclaimer.
  26 *
  27 *      - Redistributions in binary form must reproduce the above
  28 *        copyright notice, this list of conditions and the following
  29 *        disclaimer in the documentation and/or other materials
  30 *        provided with the distribution.
  31 *
  32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  43 * OF THE POSSIBILITY OF SUCH DAMAGE.
  44 */
  45
  46#include <asm/page.h>
  47#include <linux/io.h>
  48#include <linux/wait.h>
  49#include <rdma/ib_addr.h>
  50#include <rdma/ib_smi.h>
  51#include <rdma/ib_user_verbs.h>
  52
  53#include "pvrdma.h"
  54
  55static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
  56                                struct pvrdma_qp *qp);
  57
  58static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq,
  59                           struct pvrdma_cq **recv_cq)
  60{
  61        *send_cq = to_vcq(qp->ibqp.send_cq);
  62        *recv_cq = to_vcq(qp->ibqp.recv_cq);
  63}
  64
  65static void pvrdma_lock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
  66                            unsigned long *scq_flags,
  67                            unsigned long *rcq_flags)
  68        __acquires(scq->cq_lock) __acquires(rcq->cq_lock)
  69{
  70        if (scq == rcq) {
  71                spin_lock_irqsave(&scq->cq_lock, *scq_flags);
  72                __acquire(rcq->cq_lock);
  73        } else if (scq->cq_handle < rcq->cq_handle) {
  74                spin_lock_irqsave(&scq->cq_lock, *scq_flags);
  75                spin_lock_irqsave_nested(&rcq->cq_lock, *rcq_flags,
  76                                         SINGLE_DEPTH_NESTING);
  77        } else {
  78                spin_lock_irqsave(&rcq->cq_lock, *rcq_flags);
  79                spin_lock_irqsave_nested(&scq->cq_lock, *scq_flags,
  80                                         SINGLE_DEPTH_NESTING);
  81        }
  82}
  83
  84static void pvrdma_unlock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
  85                              unsigned long *scq_flags,
  86                              unsigned long *rcq_flags)
  87        __releases(scq->cq_lock) __releases(rcq->cq_lock)
  88{
  89        if (scq == rcq) {
  90                __release(rcq->cq_lock);
  91                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  92        } else if (scq->cq_handle < rcq->cq_handle) {
  93                spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
  94                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  95        } else {
  96                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  97                spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
  98        }
  99}
 100
 101static void pvrdma_reset_qp(struct pvrdma_qp *qp)
 102{
 103        struct pvrdma_cq *scq, *rcq;
 104        unsigned long scq_flags, rcq_flags;
 105
 106        /* Clean up cqes */
 107        get_cqs(qp, &scq, &rcq);
 108        pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 109
 110        _pvrdma_flush_cqe(qp, scq);
 111        if (scq != rcq)
 112                _pvrdma_flush_cqe(qp, rcq);
 113
 114        pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 115
 116        /*
 117         * Reset queuepair. The checks are because usermode queuepairs won't
 118         * have kernel ringstates.
 119         */
 120        if (qp->rq.ring) {
 121                atomic_set(&qp->rq.ring->cons_head, 0);
 122                atomic_set(&qp->rq.ring->prod_tail, 0);
 123        }
 124        if (qp->sq.ring) {
 125                atomic_set(&qp->sq.ring->cons_head, 0);
 126                atomic_set(&qp->sq.ring->prod_tail, 0);
 127        }
 128}
 129
 130static int pvrdma_set_rq_size(struct pvrdma_dev *dev,
 131                              struct ib_qp_cap *req_cap,
 132                              struct pvrdma_qp *qp)
 133{
 134        if (req_cap->max_recv_wr > dev->dsr->caps.max_qp_wr ||
 135            req_cap->max_recv_sge > dev->dsr->caps.max_sge) {
 136                dev_warn(&dev->pdev->dev, "recv queue size invalid\n");
 137                return -EINVAL;
 138        }
 139
 140        qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_recv_wr));
 141        qp->rq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_recv_sge));
 142
 143        /* Write back */
 144        req_cap->max_recv_wr = qp->rq.wqe_cnt;
 145        req_cap->max_recv_sge = qp->rq.max_sg;
 146
 147        qp->rq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr) +
 148                                             sizeof(struct pvrdma_sge) *
 149                                             qp->rq.max_sg);
 150        qp->npages_recv = (qp->rq.wqe_cnt * qp->rq.wqe_size + PAGE_SIZE - 1) /
 151                          PAGE_SIZE;
 152
 153        return 0;
 154}
 155
 156static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
 157                              struct pvrdma_qp *qp)
 158{
 159        if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr ||
 160            req_cap->max_send_sge > dev->dsr->caps.max_sge) {
 161                dev_warn(&dev->pdev->dev, "send queue size invalid\n");
 162                return -EINVAL;
 163        }
 164
 165        qp->sq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_send_wr));
 166        qp->sq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_send_sge));
 167
 168        /* Write back */
 169        req_cap->max_send_wr = qp->sq.wqe_cnt;
 170        req_cap->max_send_sge = qp->sq.max_sg;
 171
 172        qp->sq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr) +
 173                                             sizeof(struct pvrdma_sge) *
 174                                             qp->sq.max_sg);
 175        /* Note: one extra page for the header. */
 176        qp->npages_send = PVRDMA_QP_NUM_HEADER_PAGES +
 177                          (qp->sq.wqe_cnt * qp->sq.wqe_size + PAGE_SIZE - 1) /
 178                                                                PAGE_SIZE;
 179
 180        return 0;
 181}
 182
 183/**
 184 * pvrdma_create_qp - create queue pair
 185 * @pd: protection domain
 186 * @init_attr: queue pair attributes
 187 * @udata: user data
 188 *
 189 * @return: the ib_qp pointer on success, otherwise returns an errno.
 190 */
 191struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 192                               struct ib_qp_init_attr *init_attr,
 193                               struct ib_udata *udata)
 194{
 195        struct pvrdma_qp *qp = NULL;
 196        struct pvrdma_dev *dev = to_vdev(pd->device);
 197        union pvrdma_cmd_req req;
 198        union pvrdma_cmd_resp rsp;
 199        struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
 200        struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp;
 201        struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2;
 202        struct pvrdma_create_qp ucmd;
 203        struct pvrdma_create_qp_resp qp_resp = {};
 204        unsigned long flags;
 205        int ret;
 206        bool is_srq = !!init_attr->srq;
 207
 208        if (init_attr->create_flags) {
 209                dev_warn(&dev->pdev->dev,
 210                         "invalid create queuepair flags %#x\n",
 211                         init_attr->create_flags);
 212                return ERR_PTR(-EINVAL);
 213        }
 214
 215        if (init_attr->qp_type != IB_QPT_RC &&
 216            init_attr->qp_type != IB_QPT_UD &&
 217            init_attr->qp_type != IB_QPT_GSI) {
 218                dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n",
 219                         init_attr->qp_type);
 220                return ERR_PTR(-EOPNOTSUPP);
 221        }
 222
 223        if (is_srq && !dev->dsr->caps.max_srq) {
 224                dev_warn(&dev->pdev->dev,
 225                         "SRQs not supported by device\n");
 226                return ERR_PTR(-EINVAL);
 227        }
 228
 229        if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp))
 230                return ERR_PTR(-ENOMEM);
 231
 232        switch (init_attr->qp_type) {
 233        case IB_QPT_GSI:
 234                if (init_attr->port_num == 0 ||
 235                    init_attr->port_num > pd->device->phys_port_cnt ||
 236                    udata) {
 237                        dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
 238                        ret = -EINVAL;
 239                        goto err_qp;
 240                }
 241                fallthrough;
 242        case IB_QPT_RC:
 243        case IB_QPT_UD:
 244                qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 245                if (!qp) {
 246                        ret = -ENOMEM;
 247                        goto err_qp;
 248                }
 249
 250                spin_lock_init(&qp->sq.lock);
 251                spin_lock_init(&qp->rq.lock);
 252                mutex_init(&qp->mutex);
 253                refcount_set(&qp->refcnt, 1);
 254                init_completion(&qp->free);
 255
 256                qp->state = IB_QPS_RESET;
 257                qp->is_kernel = !udata;
 258
 259                if (!qp->is_kernel) {
 260                        dev_dbg(&dev->pdev->dev,
 261                                "create queuepair from user space\n");
 262
 263                        if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
 264                                ret = -EFAULT;
 265                                goto err_qp;
 266                        }
 267
 268                        /* Userspace supports qpn and qp handles? */
 269                        if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION &&
 270                            udata->outlen < sizeof(qp_resp)) {
 271                                dev_warn(&dev->pdev->dev,
 272                                         "create queuepair not supported\n");
 273                                ret = -EOPNOTSUPP;
 274                                goto err_qp;
 275                        }
 276
 277                        if (!is_srq) {
 278                                /* set qp->sq.wqe_cnt, shift, buf_size.. */
 279                                qp->rumem =
 280                                        ib_umem_get(pd->device, ucmd.rbuf_addr,
 281                                                    ucmd.rbuf_size, 0);
 282                                if (IS_ERR(qp->rumem)) {
 283                                        ret = PTR_ERR(qp->rumem);
 284                                        goto err_qp;
 285                                }
 286                                qp->srq = NULL;
 287                        } else {
 288                                qp->rumem = NULL;
 289                                qp->srq = to_vsrq(init_attr->srq);
 290                        }
 291
 292                        qp->sumem = ib_umem_get(pd->device, ucmd.sbuf_addr,
 293                                                ucmd.sbuf_size, 0);
 294                        if (IS_ERR(qp->sumem)) {
 295                                if (!is_srq)
 296                                        ib_umem_release(qp->rumem);
 297                                ret = PTR_ERR(qp->sumem);
 298                                goto err_qp;
 299                        }
 300
 301                        qp->npages_send = ib_umem_page_count(qp->sumem);
 302                        if (!is_srq)
 303                                qp->npages_recv = ib_umem_page_count(qp->rumem);
 304                        else
 305                                qp->npages_recv = 0;
 306                        qp->npages = qp->npages_send + qp->npages_recv;
 307                } else {
 308                        ret = pvrdma_set_sq_size(to_vdev(pd->device),
 309                                                 &init_attr->cap, qp);
 310                        if (ret)
 311                                goto err_qp;
 312
 313                        ret = pvrdma_set_rq_size(to_vdev(pd->device),
 314                                                 &init_attr->cap, qp);
 315                        if (ret)
 316                                goto err_qp;
 317
 318                        qp->npages = qp->npages_send + qp->npages_recv;
 319
 320                        /* Skip header page. */
 321                        qp->sq.offset = PVRDMA_QP_NUM_HEADER_PAGES * PAGE_SIZE;
 322
 323                        /* Recv queue pages are after send pages. */
 324                        qp->rq.offset = qp->npages_send * PAGE_SIZE;
 325                }
 326
 327                if (qp->npages < 0 || qp->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
 328                        dev_warn(&dev->pdev->dev,
 329                                 "overflow pages in queuepair\n");
 330                        ret = -EINVAL;
 331                        goto err_umem;
 332                }
 333
 334                ret = pvrdma_page_dir_init(dev, &qp->pdir, qp->npages,
 335                                           qp->is_kernel);
 336                if (ret) {
 337                        dev_warn(&dev->pdev->dev,
 338                                 "could not allocate page directory\n");
 339                        goto err_umem;
 340                }
 341
 342                if (!qp->is_kernel) {
 343                        pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0);
 344                        if (!is_srq)
 345                                pvrdma_page_dir_insert_umem(&qp->pdir,
 346                                                            qp->rumem,
 347                                                            qp->npages_send);
 348                } else {
 349                        /* Ring state is always the first page. */
 350                        qp->sq.ring = qp->pdir.pages[0];
 351                        qp->rq.ring = is_srq ? NULL : &qp->sq.ring[1];
 352                }
 353                break;
 354        default:
 355                ret = -EINVAL;
 356                goto err_qp;
 357        }
 358
 359        /* Not supported */
 360        init_attr->cap.max_inline_data = 0;
 361
 362        memset(cmd, 0, sizeof(*cmd));
 363        cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP;
 364        cmd->pd_handle = to_vpd(pd)->pd_handle;
 365        cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle;
 366        cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle;
 367        if (is_srq)
 368                cmd->srq_handle = to_vsrq(init_attr->srq)->srq_handle;
 369        else
 370                cmd->srq_handle = 0;
 371        cmd->max_send_wr = init_attr->cap.max_send_wr;
 372        cmd->max_recv_wr = init_attr->cap.max_recv_wr;
 373        cmd->max_send_sge = init_attr->cap.max_send_sge;
 374        cmd->max_recv_sge = init_attr->cap.max_recv_sge;
 375        cmd->max_inline_data = init_attr->cap.max_inline_data;
 376        cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
 377        cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
 378        cmd->is_srq = is_srq;
 379        cmd->lkey = 0;
 380        cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
 381        cmd->total_chunks = qp->npages;
 382        cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES;
 383        cmd->pdir_dma = qp->pdir.dir_dma;
 384
 385        dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
 386                cmd->max_send_wr, cmd->max_recv_wr, cmd->max_send_sge,
 387                cmd->max_recv_sge);
 388
 389        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_QP_RESP);
 390        if (ret < 0) {
 391                dev_warn(&dev->pdev->dev,
 392                         "could not create queuepair, error: %d\n", ret);
 393                goto err_pdir;
 394        }
 395
 396        /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
 397        qp->port = init_attr->port_num;
 398
 399        if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) {
 400                qp->ibqp.qp_num = resp_v2->qpn;
 401                qp->qp_handle = resp_v2->qp_handle;
 402        } else {
 403                qp->ibqp.qp_num = resp->qpn;
 404                qp->qp_handle = resp->qpn;
 405        }
 406
 407        spin_lock_irqsave(&dev->qp_tbl_lock, flags);
 408        dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp;
 409        spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
 410
 411        if (udata) {
 412                qp_resp.qpn = qp->ibqp.qp_num;
 413                qp_resp.qp_handle = qp->qp_handle;
 414
 415                if (ib_copy_to_udata(udata, &qp_resp,
 416                                     min(udata->outlen, sizeof(qp_resp)))) {
 417                        dev_warn(&dev->pdev->dev,
 418                                 "failed to copy back udata\n");
 419                        __pvrdma_destroy_qp(dev, qp);
 420                        return ERR_PTR(-EINVAL);
 421                }
 422        }
 423
 424        return &qp->ibqp;
 425
 426err_pdir:
 427        pvrdma_page_dir_cleanup(dev, &qp->pdir);
 428err_umem:
 429        ib_umem_release(qp->rumem);
 430        ib_umem_release(qp->sumem);
 431err_qp:
 432        kfree(qp);
 433        atomic_dec(&dev->num_qps);
 434
 435        return ERR_PTR(ret);
 436}
 437
 438static void _pvrdma_free_qp(struct pvrdma_qp *qp)
 439{
 440        unsigned long flags;
 441        struct pvrdma_dev *dev = to_vdev(qp->ibqp.device);
 442
 443        spin_lock_irqsave(&dev->qp_tbl_lock, flags);
 444        dev->qp_tbl[qp->qp_handle] = NULL;
 445        spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
 446
 447        if (refcount_dec_and_test(&qp->refcnt))
 448                complete(&qp->free);
 449        wait_for_completion(&qp->free);
 450
 451        ib_umem_release(qp->rumem);
 452        ib_umem_release(qp->sumem);
 453
 454        pvrdma_page_dir_cleanup(dev, &qp->pdir);
 455
 456        kfree(qp);
 457
 458        atomic_dec(&dev->num_qps);
 459}
 460
 461static void pvrdma_free_qp(struct pvrdma_qp *qp)
 462{
 463        struct pvrdma_cq *scq;
 464        struct pvrdma_cq *rcq;
 465        unsigned long scq_flags, rcq_flags;
 466
 467        /* In case cq is polling */
 468        get_cqs(qp, &scq, &rcq);
 469        pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 470
 471        _pvrdma_flush_cqe(qp, scq);
 472        if (scq != rcq)
 473                _pvrdma_flush_cqe(qp, rcq);
 474
 475        /*
 476         * We're now unlocking the CQs before clearing out the qp handle this
 477         * should still be safe. We have destroyed the backend QP and flushed
 478         * the CQEs so there should be no other completions for this QP.
 479         */
 480        pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 481
 482        _pvrdma_free_qp(qp);
 483}
 484
 485static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev,
 486                                           u32 qp_handle)
 487{
 488        union pvrdma_cmd_req req;
 489        struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp;
 490        int ret;
 491
 492        memset(cmd, 0, sizeof(*cmd));
 493        cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP;
 494        cmd->qp_handle = qp_handle;
 495
 496        ret = pvrdma_cmd_post(dev, &req, NULL, 0);
 497        if (ret < 0)
 498                dev_warn(&dev->pdev->dev,
 499                         "destroy queuepair failed, error: %d\n", ret);
 500}
 501
 502/**
 503 * pvrdma_destroy_qp - destroy a queue pair
 504 * @qp: the queue pair to destroy
 505 * @udata: user data or null for kernel object
 506 *
 507 * @return: always 0.
 508 */
 509int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
 510{
 511        struct pvrdma_qp *vqp = to_vqp(qp);
 512
 513        _pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle);
 514        pvrdma_free_qp(vqp);
 515
 516        return 0;
 517}
 518
 519static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
 520                                struct pvrdma_qp *qp)
 521{
 522        _pvrdma_destroy_qp_work(dev, qp->qp_handle);
 523        _pvrdma_free_qp(qp);
 524}
 525
 526/**
 527 * pvrdma_modify_qp - modify queue pair attributes
 528 * @ibqp: the queue pair
 529 * @attr: the new queue pair's attributes
 530 * @attr_mask: attributes mask
 531 * @udata: user data
 532 *
 533 * @returns 0 on success, otherwise returns an errno.
 534 */
 535int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 536                     int attr_mask, struct ib_udata *udata)
 537{
 538        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 539        struct pvrdma_qp *qp = to_vqp(ibqp);
 540        union pvrdma_cmd_req req;
 541        union pvrdma_cmd_resp rsp;
 542        struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp;
 543        enum ib_qp_state cur_state, next_state;
 544        int ret;
 545
 546        /* Sanity checking. Should need lock here */
 547        mutex_lock(&qp->mutex);
 548        cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state :
 549                qp->state;
 550        next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
 551
 552        if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
 553                                attr_mask)) {
 554                ret = -EINVAL;
 555                goto out;
 556        }
 557
 558        if (attr_mask & IB_QP_PORT) {
 559                if (attr->port_num == 0 ||
 560                    attr->port_num > ibqp->device->phys_port_cnt) {
 561                        ret = -EINVAL;
 562                        goto out;
 563                }
 564        }
 565
 566        if (attr_mask & IB_QP_MIN_RNR_TIMER) {
 567                if (attr->min_rnr_timer > 31) {
 568                        ret = -EINVAL;
 569                        goto out;
 570                }
 571        }
 572
 573        if (attr_mask & IB_QP_PKEY_INDEX) {
 574                if (attr->pkey_index >= dev->dsr->caps.max_pkeys) {
 575                        ret = -EINVAL;
 576                        goto out;
 577                }
 578        }
 579
 580        if (attr_mask & IB_QP_QKEY)
 581                qp->qkey = attr->qkey;
 582
 583        if (cur_state == next_state && cur_state == IB_QPS_RESET) {
 584                ret = 0;
 585                goto out;
 586        }
 587
 588        qp->state = next_state;
 589        memset(cmd, 0, sizeof(*cmd));
 590        cmd->hdr.cmd = PVRDMA_CMD_MODIFY_QP;
 591        cmd->qp_handle = qp->qp_handle;
 592        cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
 593        cmd->attrs.qp_state = ib_qp_state_to_pvrdma(attr->qp_state);
 594        cmd->attrs.cur_qp_state =
 595                ib_qp_state_to_pvrdma(attr->cur_qp_state);
 596        cmd->attrs.path_mtu = ib_mtu_to_pvrdma(attr->path_mtu);
 597        cmd->attrs.path_mig_state =
 598                ib_mig_state_to_pvrdma(attr->path_mig_state);
 599        cmd->attrs.qkey = attr->qkey;
 600        cmd->attrs.rq_psn = attr->rq_psn;
 601        cmd->attrs.sq_psn = attr->sq_psn;
 602        cmd->attrs.dest_qp_num = attr->dest_qp_num;
 603        cmd->attrs.qp_access_flags =
 604                ib_access_flags_to_pvrdma(attr->qp_access_flags);
 605        cmd->attrs.pkey_index = attr->pkey_index;
 606        cmd->attrs.alt_pkey_index = attr->alt_pkey_index;
 607        cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify;
 608        cmd->attrs.sq_draining = attr->sq_draining;
 609        cmd->attrs.max_rd_atomic = attr->max_rd_atomic;
 610        cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic;
 611        cmd->attrs.min_rnr_timer = attr->min_rnr_timer;
 612        cmd->attrs.port_num = attr->port_num;
 613        cmd->attrs.timeout = attr->timeout;
 614        cmd->attrs.retry_cnt = attr->retry_cnt;
 615        cmd->attrs.rnr_retry = attr->rnr_retry;
 616        cmd->attrs.alt_port_num = attr->alt_port_num;
 617        cmd->attrs.alt_timeout = attr->alt_timeout;
 618        ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap);
 619        rdma_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
 620        rdma_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
 621
 622        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP);
 623        if (ret < 0) {
 624                dev_warn(&dev->pdev->dev,
 625                         "could not modify queuepair, error: %d\n", ret);
 626        } else if (rsp.hdr.err > 0) {
 627                dev_warn(&dev->pdev->dev,
 628                         "cannot modify queuepair, error: %d\n", rsp.hdr.err);
 629                ret = -EINVAL;
 630        }
 631
 632        if (ret == 0 && next_state == IB_QPS_RESET)
 633                pvrdma_reset_qp(qp);
 634
 635out:
 636        mutex_unlock(&qp->mutex);
 637
 638        return ret;
 639}
 640
 641static inline void *get_sq_wqe(struct pvrdma_qp *qp, unsigned int n)
 642{
 643        return pvrdma_page_dir_get_ptr(&qp->pdir,
 644                                       qp->sq.offset + n * qp->sq.wqe_size);
 645}
 646
 647static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
 648{
 649        return pvrdma_page_dir_get_ptr(&qp->pdir,
 650                                       qp->rq.offset + n * qp->rq.wqe_size);
 651}
 652
 653static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr,
 654                       const struct ib_reg_wr *wr)
 655{
 656        struct pvrdma_user_mr *mr = to_vmr(wr->mr);
 657
 658        wqe_hdr->wr.fast_reg.iova_start = mr->ibmr.iova;
 659        wqe_hdr->wr.fast_reg.pl_pdir_dma = mr->pdir.dir_dma;
 660        wqe_hdr->wr.fast_reg.page_shift = mr->page_shift;
 661        wqe_hdr->wr.fast_reg.page_list_len = mr->npages;
 662        wqe_hdr->wr.fast_reg.length = mr->ibmr.length;
 663        wqe_hdr->wr.fast_reg.access_flags = wr->access;
 664        wqe_hdr->wr.fast_reg.rkey = wr->key;
 665
 666        return pvrdma_page_dir_insert_page_list(&mr->pdir, mr->pages,
 667                                                mr->npages);
 668}
 669
 670/**
 671 * pvrdma_post_send - post send work request entries on a QP
 672 * @ibqp: the QP
 673 * @wr: work request list to post
 674 * @bad_wr: the first bad WR returned
 675 *
 676 * @return: 0 on success, otherwise errno returned.
 677 */
 678int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 679                     const struct ib_send_wr **bad_wr)
 680{
 681        struct pvrdma_qp *qp = to_vqp(ibqp);
 682        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 683        unsigned long flags;
 684        struct pvrdma_sq_wqe_hdr *wqe_hdr;
 685        struct pvrdma_sge *sge;
 686        int i, ret;
 687
 688        /*
 689         * In states lower than RTS, we can fail immediately. In other states,
 690         * just post and let the device figure it out.
 691         */
 692        if (qp->state < IB_QPS_RTS) {
 693                *bad_wr = wr;
 694                return -EINVAL;
 695        }
 696
 697        spin_lock_irqsave(&qp->sq.lock, flags);
 698
 699        while (wr) {
 700                unsigned int tail = 0;
 701
 702                if (unlikely(!pvrdma_idx_ring_has_space(
 703                                qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
 704                        dev_warn_ratelimited(&dev->pdev->dev,
 705                                             "send queue is full\n");
 706                        *bad_wr = wr;
 707                        ret = -ENOMEM;
 708                        goto out;
 709                }
 710
 711                if (unlikely(wr->num_sge > qp->sq.max_sg || wr->num_sge < 0)) {
 712                        dev_warn_ratelimited(&dev->pdev->dev,
 713                                             "send SGE overflow\n");
 714                        *bad_wr = wr;
 715                        ret = -EINVAL;
 716                        goto out;
 717                }
 718
 719                if (unlikely(wr->opcode < 0)) {
 720                        dev_warn_ratelimited(&dev->pdev->dev,
 721                                             "invalid send opcode\n");
 722                        *bad_wr = wr;
 723                        ret = -EINVAL;
 724                        goto out;
 725                }
 726
 727                /*
 728                 * Only support UD, RC.
 729                 * Need to check opcode table for thorough checking.
 730                 * opcode               _UD     _UC     _RC
 731                 * _SEND                x       x       x
 732                 * _SEND_WITH_IMM       x       x       x
 733                 * _RDMA_WRITE                  x       x
 734                 * _RDMA_WRITE_WITH_IMM         x       x
 735                 * _LOCAL_INV                   x       x
 736                 * _SEND_WITH_INV               x       x
 737                 * _RDMA_READ                           x
 738                 * _ATOMIC_CMP_AND_SWP                  x
 739                 * _ATOMIC_FETCH_AND_ADD                x
 740                 * _MASK_ATOMIC_CMP_AND_SWP             x
 741                 * _MASK_ATOMIC_FETCH_AND_ADD           x
 742                 * _REG_MR                              x
 743                 *
 744                 */
 745                if (qp->ibqp.qp_type != IB_QPT_UD &&
 746                    qp->ibqp.qp_type != IB_QPT_RC &&
 747                        wr->opcode != IB_WR_SEND) {
 748                        dev_warn_ratelimited(&dev->pdev->dev,
 749                                             "unsupported queuepair type\n");
 750                        *bad_wr = wr;
 751                        ret = -EINVAL;
 752                        goto out;
 753                } else if (qp->ibqp.qp_type == IB_QPT_UD ||
 754                           qp->ibqp.qp_type == IB_QPT_GSI) {
 755                        if (wr->opcode != IB_WR_SEND &&
 756                            wr->opcode != IB_WR_SEND_WITH_IMM) {
 757                                dev_warn_ratelimited(&dev->pdev->dev,
 758                                                     "invalid send opcode\n");
 759                                *bad_wr = wr;
 760                                ret = -EINVAL;
 761                                goto out;
 762                        }
 763                }
 764
 765                wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, tail);
 766                memset(wqe_hdr, 0, sizeof(*wqe_hdr));
 767                wqe_hdr->wr_id = wr->wr_id;
 768                wqe_hdr->num_sge = wr->num_sge;
 769                wqe_hdr->opcode = ib_wr_opcode_to_pvrdma(wr->opcode);
 770                wqe_hdr->send_flags = ib_send_flags_to_pvrdma(wr->send_flags);
 771                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
 772                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
 773                        wqe_hdr->ex.imm_data = wr->ex.imm_data;
 774
 775                if (unlikely(wqe_hdr->opcode == PVRDMA_WR_ERROR)) {
 776                        *bad_wr = wr;
 777                        ret = -EINVAL;
 778                        goto out;
 779                }
 780
 781                switch (qp->ibqp.qp_type) {
 782                case IB_QPT_GSI:
 783                case IB_QPT_UD:
 784                        if (unlikely(!ud_wr(wr)->ah)) {
 785                                dev_warn_ratelimited(&dev->pdev->dev,
 786                                                     "invalid address handle\n");
 787                                *bad_wr = wr;
 788                                ret = -EINVAL;
 789                                goto out;
 790                        }
 791
 792                        /*
 793                         * Use qkey from qp context if high order bit set,
 794                         * otherwise from work request.
 795                         */
 796                        wqe_hdr->wr.ud.remote_qpn = ud_wr(wr)->remote_qpn;
 797                        wqe_hdr->wr.ud.remote_qkey =
 798                                ud_wr(wr)->remote_qkey & 0x80000000 ?
 799                                qp->qkey : ud_wr(wr)->remote_qkey;
 800                        wqe_hdr->wr.ud.av = to_vah(ud_wr(wr)->ah)->av;
 801
 802                        break;
 803                case IB_QPT_RC:
 804                        switch (wr->opcode) {
 805                        case IB_WR_RDMA_READ:
 806                        case IB_WR_RDMA_WRITE:
 807                        case IB_WR_RDMA_WRITE_WITH_IMM:
 808                                wqe_hdr->wr.rdma.remote_addr =
 809                                        rdma_wr(wr)->remote_addr;
 810                                wqe_hdr->wr.rdma.rkey = rdma_wr(wr)->rkey;
 811                                break;
 812                        case IB_WR_LOCAL_INV:
 813                        case IB_WR_SEND_WITH_INV:
 814                                wqe_hdr->ex.invalidate_rkey =
 815                                        wr->ex.invalidate_rkey;
 816                                break;
 817                        case IB_WR_ATOMIC_CMP_AND_SWP:
 818                        case IB_WR_ATOMIC_FETCH_AND_ADD:
 819                                wqe_hdr->wr.atomic.remote_addr =
 820                                        atomic_wr(wr)->remote_addr;
 821                                wqe_hdr->wr.atomic.rkey = atomic_wr(wr)->rkey;
 822                                wqe_hdr->wr.atomic.compare_add =
 823                                        atomic_wr(wr)->compare_add;
 824                                if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP)
 825                                        wqe_hdr->wr.atomic.swap =
 826                                                atomic_wr(wr)->swap;
 827                                break;
 828                        case IB_WR_REG_MR:
 829                                ret = set_reg_seg(wqe_hdr, reg_wr(wr));
 830                                if (ret < 0) {
 831                                        dev_warn_ratelimited(&dev->pdev->dev,
 832                                                             "Failed to set fast register work request\n");
 833                                        *bad_wr = wr;
 834                                        goto out;
 835                                }
 836                                break;
 837                        default:
 838                                break;
 839                        }
 840
 841                        break;
 842                default:
 843                        dev_warn_ratelimited(&dev->pdev->dev,
 844                                             "invalid queuepair type\n");
 845                        ret = -EINVAL;
 846                        *bad_wr = wr;
 847                        goto out;
 848                }
 849
 850                sge = (struct pvrdma_sge *)(wqe_hdr + 1);
 851                for (i = 0; i < wr->num_sge; i++) {
 852                        /* Need to check wqe_size 0 or max size */
 853                        sge->addr = wr->sg_list[i].addr;
 854                        sge->length = wr->sg_list[i].length;
 855                        sge->lkey = wr->sg_list[i].lkey;
 856                        sge++;
 857                }
 858
 859                /* Make sure wqe is written before index update */
 860                smp_wmb();
 861
 862                /* Update shared sq ring */
 863                pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
 864                                    qp->sq.wqe_cnt);
 865
 866                wr = wr->next;
 867        }
 868
 869        ret = 0;
 870
 871out:
 872        spin_unlock_irqrestore(&qp->sq.lock, flags);
 873
 874        if (!ret)
 875                pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_SEND | qp->qp_handle);
 876
 877        return ret;
 878}
 879
 880/**
 881 * pvrdma_post_receive - post receive work request entries on a QP
 882 * @ibqp: the QP
 883 * @wr: the work request list to post
 884 * @bad_wr: the first bad WR returned
 885 *
 886 * @return: 0 on success, otherwise errno returned.
 887 */
 888int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 889                     const struct ib_recv_wr **bad_wr)
 890{
 891        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 892        unsigned long flags;
 893        struct pvrdma_qp *qp = to_vqp(ibqp);
 894        struct pvrdma_rq_wqe_hdr *wqe_hdr;
 895        struct pvrdma_sge *sge;
 896        int ret = 0;
 897        int i;
 898
 899        /*
 900         * In the RESET state, we can fail immediately. For other states,
 901         * just post and let the device figure it out.
 902         */
 903        if (qp->state == IB_QPS_RESET) {
 904                *bad_wr = wr;
 905                return -EINVAL;
 906        }
 907
 908        if (qp->srq) {
 909                dev_warn(&dev->pdev->dev, "QP associated with SRQ\n");
 910                *bad_wr = wr;
 911                return -EINVAL;
 912        }
 913
 914        spin_lock_irqsave(&qp->rq.lock, flags);
 915
 916        while (wr) {
 917                unsigned int tail = 0;
 918
 919                if (unlikely(wr->num_sge > qp->rq.max_sg ||
 920                             wr->num_sge < 0)) {
 921                        ret = -EINVAL;
 922                        *bad_wr = wr;
 923                        dev_warn_ratelimited(&dev->pdev->dev,
 924                                             "recv SGE overflow\n");
 925                        goto out;
 926                }
 927
 928                if (unlikely(!pvrdma_idx_ring_has_space(
 929                                qp->rq.ring, qp->rq.wqe_cnt, &tail))) {
 930                        ret = -ENOMEM;
 931                        *bad_wr = wr;
 932                        dev_warn_ratelimited(&dev->pdev->dev,
 933                                             "recv queue full\n");
 934                        goto out;
 935                }
 936
 937                wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, tail);
 938                wqe_hdr->wr_id = wr->wr_id;
 939                wqe_hdr->num_sge = wr->num_sge;
 940                wqe_hdr->total_len = 0;
 941
 942                sge = (struct pvrdma_sge *)(wqe_hdr + 1);
 943                for (i = 0; i < wr->num_sge; i++) {
 944                        sge->addr = wr->sg_list[i].addr;
 945                        sge->length = wr->sg_list[i].length;
 946                        sge->lkey = wr->sg_list[i].lkey;
 947                        sge++;
 948                }
 949
 950                /* Make sure wqe is written before index update */
 951                smp_wmb();
 952
 953                /* Update shared rq ring */
 954                pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
 955                                    qp->rq.wqe_cnt);
 956
 957                wr = wr->next;
 958        }
 959
 960        spin_unlock_irqrestore(&qp->rq.lock, flags);
 961
 962        pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_RECV | qp->qp_handle);
 963
 964        return ret;
 965
 966out:
 967        spin_unlock_irqrestore(&qp->rq.lock, flags);
 968
 969        return ret;
 970}
 971
 972/**
 973 * pvrdma_query_qp - query a queue pair's attributes
 974 * @ibqp: the queue pair to query
 975 * @attr: the queue pair's attributes
 976 * @attr_mask: attributes mask
 977 * @init_attr: initial queue pair attributes
 978 *
 979 * @returns 0 on success, otherwise returns an errno.
 980 */
 981int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 982                    int attr_mask, struct ib_qp_init_attr *init_attr)
 983{
 984        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 985        struct pvrdma_qp *qp = to_vqp(ibqp);
 986        union pvrdma_cmd_req req;
 987        union pvrdma_cmd_resp rsp;
 988        struct pvrdma_cmd_query_qp *cmd = &req.query_qp;
 989        struct pvrdma_cmd_query_qp_resp *resp = &rsp.query_qp_resp;
 990        int ret = 0;
 991
 992        mutex_lock(&qp->mutex);
 993
 994        if (qp->state == IB_QPS_RESET) {
 995                attr->qp_state = IB_QPS_RESET;
 996                goto out;
 997        }
 998
 999        memset(cmd, 0, sizeof(*cmd));
1000        cmd->hdr.cmd = PVRDMA_CMD_QUERY_QP;
1001        cmd->qp_handle = qp->qp_handle;
1002        cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
1003
1004        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_QP_RESP);
1005        if (ret < 0) {
1006                dev_warn(&dev->pdev->dev,
1007                         "could not query queuepair, error: %d\n", ret);
1008                goto out;
1009        }
1010
1011        attr->qp_state = pvrdma_qp_state_to_ib(resp->attrs.qp_state);
1012        attr->cur_qp_state =
1013                pvrdma_qp_state_to_ib(resp->attrs.cur_qp_state);
1014        attr->path_mtu = pvrdma_mtu_to_ib(resp->attrs.path_mtu);
1015        attr->path_mig_state =
1016                pvrdma_mig_state_to_ib(resp->attrs.path_mig_state);
1017        attr->qkey = resp->attrs.qkey;
1018        attr->rq_psn = resp->attrs.rq_psn;
1019        attr->sq_psn = resp->attrs.sq_psn;
1020        attr->dest_qp_num = resp->attrs.dest_qp_num;
1021        attr->qp_access_flags =
1022                pvrdma_access_flags_to_ib(resp->attrs.qp_access_flags);
1023        attr->pkey_index = resp->attrs.pkey_index;
1024        attr->alt_pkey_index = resp->attrs.alt_pkey_index;
1025        attr->en_sqd_async_notify = resp->attrs.en_sqd_async_notify;
1026        attr->sq_draining = resp->attrs.sq_draining;
1027        attr->max_rd_atomic = resp->attrs.max_rd_atomic;
1028        attr->max_dest_rd_atomic = resp->attrs.max_dest_rd_atomic;
1029        attr->min_rnr_timer = resp->attrs.min_rnr_timer;
1030        attr->port_num = resp->attrs.port_num;
1031        attr->timeout = resp->attrs.timeout;
1032        attr->retry_cnt = resp->attrs.retry_cnt;
1033        attr->rnr_retry = resp->attrs.rnr_retry;
1034        attr->alt_port_num = resp->attrs.alt_port_num;
1035        attr->alt_timeout = resp->attrs.alt_timeout;
1036        pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap);
1037        pvrdma_ah_attr_to_rdma(&attr->ah_attr, &resp->attrs.ah_attr);
1038        pvrdma_ah_attr_to_rdma(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
1039
1040        qp->state = attr->qp_state;
1041
1042        ret = 0;
1043
1044out:
1045        attr->cur_qp_state = attr->qp_state;
1046
1047        init_attr->event_handler = qp->ibqp.event_handler;
1048        init_attr->qp_context = qp->ibqp.qp_context;
1049        init_attr->send_cq = qp->ibqp.send_cq;
1050        init_attr->recv_cq = qp->ibqp.recv_cq;
1051        init_attr->srq = qp->ibqp.srq;
1052        init_attr->xrcd = NULL;
1053        init_attr->cap = attr->cap;
1054        init_attr->sq_sig_type = 0;
1055        init_attr->qp_type = qp->ibqp.qp_type;
1056        init_attr->create_flags = 0;
1057        init_attr->port_num = qp->port;
1058
1059        mutex_unlock(&qp->mutex);
1060        return ret;
1061}
1062