linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of EITHER the GNU General Public License
   6 * version 2 as published by the Free Software Foundation or the BSD
   7 * 2-Clause License. This program is distributed in the hope that it
   8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
   9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  10 * See the GNU General Public License version 2 for more details at
  11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program available in the file COPYING in the main
  15 * directory of this source tree.
  16 *
  17 * The BSD 2-Clause License
  18 *
  19 *     Redistribution and use in source and binary forms, with or
  20 *     without modification, are permitted provided that the following
  21 *     conditions are met:
  22 *
  23 *      - Redistributions of source code must retain the above
  24 *        copyright notice, this list of conditions and the following
  25 *        disclaimer.
  26 *
  27 *      - Redistributions in binary form must reproduce the above
  28 *        copyright notice, this list of conditions and the following
  29 *        disclaimer in the documentation and/or other materials
  30 *        provided with the distribution.
  31 *
  32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  43 * OF THE POSSIBILITY OF SUCH DAMAGE.
  44 */
  45
  46#include <asm/page.h>
  47#include <linux/io.h>
  48#include <linux/wait.h>
  49#include <rdma/ib_addr.h>
  50#include <rdma/ib_smi.h>
  51#include <rdma/ib_user_verbs.h>
  52
  53#include "pvrdma.h"
  54
  55static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
  56                                struct pvrdma_qp *qp);
  57
  58static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq,
  59                           struct pvrdma_cq **recv_cq)
  60{
  61        *send_cq = to_vcq(qp->ibqp.send_cq);
  62        *recv_cq = to_vcq(qp->ibqp.recv_cq);
  63}
  64
  65static void pvrdma_lock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
  66                            unsigned long *scq_flags,
  67                            unsigned long *rcq_flags)
  68        __acquires(scq->cq_lock) __acquires(rcq->cq_lock)
  69{
  70        if (scq == rcq) {
  71                spin_lock_irqsave(&scq->cq_lock, *scq_flags);
  72                __acquire(rcq->cq_lock);
  73        } else if (scq->cq_handle < rcq->cq_handle) {
  74                spin_lock_irqsave(&scq->cq_lock, *scq_flags);
  75                spin_lock_irqsave_nested(&rcq->cq_lock, *rcq_flags,
  76                                         SINGLE_DEPTH_NESTING);
  77        } else {
  78                spin_lock_irqsave(&rcq->cq_lock, *rcq_flags);
  79                spin_lock_irqsave_nested(&scq->cq_lock, *scq_flags,
  80                                         SINGLE_DEPTH_NESTING);
  81        }
  82}
  83
  84static void pvrdma_unlock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
  85                              unsigned long *scq_flags,
  86                              unsigned long *rcq_flags)
  87        __releases(scq->cq_lock) __releases(rcq->cq_lock)
  88{
  89        if (scq == rcq) {
  90                __release(rcq->cq_lock);
  91                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  92        } else if (scq->cq_handle < rcq->cq_handle) {
  93                spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
  94                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  95        } else {
  96                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  97                spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
  98        }
  99}
 100
 101static void pvrdma_reset_qp(struct pvrdma_qp *qp)
 102{
 103        struct pvrdma_cq *scq, *rcq;
 104        unsigned long scq_flags, rcq_flags;
 105
 106        /* Clean up cqes */
 107        get_cqs(qp, &scq, &rcq);
 108        pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 109
 110        _pvrdma_flush_cqe(qp, scq);
 111        if (scq != rcq)
 112                _pvrdma_flush_cqe(qp, rcq);
 113
 114        pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 115
 116        /*
 117         * Reset queuepair. The checks are because usermode queuepairs won't
 118         * have kernel ringstates.
 119         */
 120        if (qp->rq.ring) {
 121                atomic_set(&qp->rq.ring->cons_head, 0);
 122                atomic_set(&qp->rq.ring->prod_tail, 0);
 123        }
 124        if (qp->sq.ring) {
 125                atomic_set(&qp->sq.ring->cons_head, 0);
 126                atomic_set(&qp->sq.ring->prod_tail, 0);
 127        }
 128}
 129
 130static int pvrdma_set_rq_size(struct pvrdma_dev *dev,
 131                              struct ib_qp_cap *req_cap,
 132                              struct pvrdma_qp *qp)
 133{
 134        if (req_cap->max_recv_wr > dev->dsr->caps.max_qp_wr ||
 135            req_cap->max_recv_sge > dev->dsr->caps.max_sge) {
 136                dev_warn(&dev->pdev->dev, "recv queue size invalid\n");
 137                return -EINVAL;
 138        }
 139
 140        qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_recv_wr));
 141        qp->rq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_recv_sge));
 142
 143        /* Write back */
 144        req_cap->max_recv_wr = qp->rq.wqe_cnt;
 145        req_cap->max_recv_sge = qp->rq.max_sg;
 146
 147        qp->rq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr) +
 148                                             sizeof(struct pvrdma_sge) *
 149                                             qp->rq.max_sg);
 150        qp->npages_recv = (qp->rq.wqe_cnt * qp->rq.wqe_size + PAGE_SIZE - 1) /
 151                          PAGE_SIZE;
 152
 153        return 0;
 154}
 155
 156static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
 157                              struct pvrdma_qp *qp)
 158{
 159        if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr ||
 160            req_cap->max_send_sge > dev->dsr->caps.max_sge) {
 161                dev_warn(&dev->pdev->dev, "send queue size invalid\n");
 162                return -EINVAL;
 163        }
 164
 165        qp->sq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_send_wr));
 166        qp->sq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_send_sge));
 167
 168        /* Write back */
 169        req_cap->max_send_wr = qp->sq.wqe_cnt;
 170        req_cap->max_send_sge = qp->sq.max_sg;
 171
 172        qp->sq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr) +
 173                                             sizeof(struct pvrdma_sge) *
 174                                             qp->sq.max_sg);
 175        /* Note: one extra page for the header. */
 176        qp->npages_send = PVRDMA_QP_NUM_HEADER_PAGES +
 177                          (qp->sq.wqe_cnt * qp->sq.wqe_size + PAGE_SIZE - 1) /
 178                                                                PAGE_SIZE;
 179
 180        return 0;
 181}
 182
 183/**
 184 * pvrdma_create_qp - create queue pair
 185 * @pd: protection domain
 186 * @init_attr: queue pair attributes
 187 * @udata: user data
 188 *
 189 * @return: the ib_qp pointer on success, otherwise returns an errno.
 190 */
 191struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 192                               struct ib_qp_init_attr *init_attr,
 193                               struct ib_udata *udata)
 194{
 195        struct pvrdma_qp *qp = NULL;
 196        struct pvrdma_dev *dev = to_vdev(pd->device);
 197        union pvrdma_cmd_req req;
 198        union pvrdma_cmd_resp rsp;
 199        struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
 200        struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp;
 201        struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2;
 202        struct pvrdma_create_qp ucmd;
 203        struct pvrdma_create_qp_resp qp_resp = {};
 204        unsigned long flags;
 205        int ret;
 206        bool is_srq = !!init_attr->srq;
 207
 208        if (init_attr->create_flags) {
 209                dev_warn(&dev->pdev->dev,
 210                         "invalid create queuepair flags %#x\n",
 211                         init_attr->create_flags);
 212                return ERR_PTR(-EOPNOTSUPP);
 213        }
 214
 215        if (init_attr->qp_type != IB_QPT_RC &&
 216            init_attr->qp_type != IB_QPT_UD &&
 217            init_attr->qp_type != IB_QPT_GSI) {
 218                dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n",
 219                         init_attr->qp_type);
 220                return ERR_PTR(-EOPNOTSUPP);
 221        }
 222
 223        if (is_srq && !dev->dsr->caps.max_srq) {
 224                dev_warn(&dev->pdev->dev,
 225                         "SRQs not supported by device\n");
 226                return ERR_PTR(-EINVAL);
 227        }
 228
 229        if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp))
 230                return ERR_PTR(-ENOMEM);
 231
 232        switch (init_attr->qp_type) {
 233        case IB_QPT_GSI:
 234                if (init_attr->port_num == 0 ||
 235                    init_attr->port_num > pd->device->phys_port_cnt) {
 236                        dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
 237                        ret = -EINVAL;
 238                        goto err_qp;
 239                }
 240                fallthrough;
 241        case IB_QPT_RC:
 242        case IB_QPT_UD:
 243                qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 244                if (!qp) {
 245                        ret = -ENOMEM;
 246                        goto err_qp;
 247                }
 248
 249                spin_lock_init(&qp->sq.lock);
 250                spin_lock_init(&qp->rq.lock);
 251                mutex_init(&qp->mutex);
 252                refcount_set(&qp->refcnt, 1);
 253                init_completion(&qp->free);
 254
 255                qp->state = IB_QPS_RESET;
 256                qp->is_kernel = !udata;
 257
 258                if (!qp->is_kernel) {
 259                        dev_dbg(&dev->pdev->dev,
 260                                "create queuepair from user space\n");
 261
 262                        if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
 263                                ret = -EFAULT;
 264                                goto err_qp;
 265                        }
 266
 267                        /* Userspace supports qpn and qp handles? */
 268                        if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION &&
 269                            udata->outlen < sizeof(qp_resp)) {
 270                                dev_warn(&dev->pdev->dev,
 271                                         "create queuepair not supported\n");
 272                                ret = -EOPNOTSUPP;
 273                                goto err_qp;
 274                        }
 275
 276                        if (!is_srq) {
 277                                /* set qp->sq.wqe_cnt, shift, buf_size.. */
 278                                qp->rumem =
 279                                        ib_umem_get(pd->device, ucmd.rbuf_addr,
 280                                                    ucmd.rbuf_size, 0);
 281                                if (IS_ERR(qp->rumem)) {
 282                                        ret = PTR_ERR(qp->rumem);
 283                                        goto err_qp;
 284                                }
 285                                qp->srq = NULL;
 286                        } else {
 287                                qp->rumem = NULL;
 288                                qp->srq = to_vsrq(init_attr->srq);
 289                        }
 290
 291                        qp->sumem = ib_umem_get(pd->device, ucmd.sbuf_addr,
 292                                                ucmd.sbuf_size, 0);
 293                        if (IS_ERR(qp->sumem)) {
 294                                if (!is_srq)
 295                                        ib_umem_release(qp->rumem);
 296                                ret = PTR_ERR(qp->sumem);
 297                                goto err_qp;
 298                        }
 299
 300                        qp->npages_send =
 301                                ib_umem_num_dma_blocks(qp->sumem, PAGE_SIZE);
 302                        if (!is_srq)
 303                                qp->npages_recv = ib_umem_num_dma_blocks(
 304                                        qp->rumem, PAGE_SIZE);
 305                        else
 306                                qp->npages_recv = 0;
 307                        qp->npages = qp->npages_send + qp->npages_recv;
 308                } else {
 309                        ret = pvrdma_set_sq_size(to_vdev(pd->device),
 310                                                 &init_attr->cap, qp);
 311                        if (ret)
 312                                goto err_qp;
 313
 314                        ret = pvrdma_set_rq_size(to_vdev(pd->device),
 315                                                 &init_attr->cap, qp);
 316                        if (ret)
 317                                goto err_qp;
 318
 319                        qp->npages = qp->npages_send + qp->npages_recv;
 320
 321                        /* Skip header page. */
 322                        qp->sq.offset = PVRDMA_QP_NUM_HEADER_PAGES * PAGE_SIZE;
 323
 324                        /* Recv queue pages are after send pages. */
 325                        qp->rq.offset = qp->npages_send * PAGE_SIZE;
 326                }
 327
 328                if (qp->npages < 0 || qp->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
 329                        dev_warn(&dev->pdev->dev,
 330                                 "overflow pages in queuepair\n");
 331                        ret = -EINVAL;
 332                        goto err_umem;
 333                }
 334
 335                ret = pvrdma_page_dir_init(dev, &qp->pdir, qp->npages,
 336                                           qp->is_kernel);
 337                if (ret) {
 338                        dev_warn(&dev->pdev->dev,
 339                                 "could not allocate page directory\n");
 340                        goto err_umem;
 341                }
 342
 343                if (!qp->is_kernel) {
 344                        pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0);
 345                        if (!is_srq)
 346                                pvrdma_page_dir_insert_umem(&qp->pdir,
 347                                                            qp->rumem,
 348                                                            qp->npages_send);
 349                } else {
 350                        /* Ring state is always the first page. */
 351                        qp->sq.ring = qp->pdir.pages[0];
 352                        qp->rq.ring = is_srq ? NULL : &qp->sq.ring[1];
 353                }
 354                break;
 355        default:
 356                ret = -EINVAL;
 357                goto err_qp;
 358        }
 359
 360        /* Not supported */
 361        init_attr->cap.max_inline_data = 0;
 362
 363        memset(cmd, 0, sizeof(*cmd));
 364        cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP;
 365        cmd->pd_handle = to_vpd(pd)->pd_handle;
 366        cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle;
 367        cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle;
 368        if (is_srq)
 369                cmd->srq_handle = to_vsrq(init_attr->srq)->srq_handle;
 370        else
 371                cmd->srq_handle = 0;
 372        cmd->max_send_wr = init_attr->cap.max_send_wr;
 373        cmd->max_recv_wr = init_attr->cap.max_recv_wr;
 374        cmd->max_send_sge = init_attr->cap.max_send_sge;
 375        cmd->max_recv_sge = init_attr->cap.max_recv_sge;
 376        cmd->max_inline_data = init_attr->cap.max_inline_data;
 377        cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
 378        cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
 379        cmd->is_srq = is_srq;
 380        cmd->lkey = 0;
 381        cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
 382        cmd->total_chunks = qp->npages;
 383        cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES;
 384        cmd->pdir_dma = qp->pdir.dir_dma;
 385
 386        dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
 387                cmd->max_send_wr, cmd->max_recv_wr, cmd->max_send_sge,
 388                cmd->max_recv_sge);
 389
 390        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_QP_RESP);
 391        if (ret < 0) {
 392                dev_warn(&dev->pdev->dev,
 393                         "could not create queuepair, error: %d\n", ret);
 394                goto err_pdir;
 395        }
 396
 397        /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
 398        qp->port = init_attr->port_num;
 399
 400        if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) {
 401                qp->ibqp.qp_num = resp_v2->qpn;
 402                qp->qp_handle = resp_v2->qp_handle;
 403        } else {
 404                qp->ibqp.qp_num = resp->qpn;
 405                qp->qp_handle = resp->qpn;
 406        }
 407
 408        spin_lock_irqsave(&dev->qp_tbl_lock, flags);
 409        dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp;
 410        spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
 411
 412        if (udata) {
 413                qp_resp.qpn = qp->ibqp.qp_num;
 414                qp_resp.qp_handle = qp->qp_handle;
 415
 416                if (ib_copy_to_udata(udata, &qp_resp,
 417                                     min(udata->outlen, sizeof(qp_resp)))) {
 418                        dev_warn(&dev->pdev->dev,
 419                                 "failed to copy back udata\n");
 420                        __pvrdma_destroy_qp(dev, qp);
 421                        return ERR_PTR(-EINVAL);
 422                }
 423        }
 424
 425        return &qp->ibqp;
 426
 427err_pdir:
 428        pvrdma_page_dir_cleanup(dev, &qp->pdir);
 429err_umem:
 430        ib_umem_release(qp->rumem);
 431        ib_umem_release(qp->sumem);
 432err_qp:
 433        kfree(qp);
 434        atomic_dec(&dev->num_qps);
 435
 436        return ERR_PTR(ret);
 437}
 438
 439static void _pvrdma_free_qp(struct pvrdma_qp *qp)
 440{
 441        unsigned long flags;
 442        struct pvrdma_dev *dev = to_vdev(qp->ibqp.device);
 443
 444        spin_lock_irqsave(&dev->qp_tbl_lock, flags);
 445        dev->qp_tbl[qp->qp_handle] = NULL;
 446        spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
 447
 448        if (refcount_dec_and_test(&qp->refcnt))
 449                complete(&qp->free);
 450        wait_for_completion(&qp->free);
 451
 452        ib_umem_release(qp->rumem);
 453        ib_umem_release(qp->sumem);
 454
 455        pvrdma_page_dir_cleanup(dev, &qp->pdir);
 456
 457        kfree(qp);
 458
 459        atomic_dec(&dev->num_qps);
 460}
 461
 462static void pvrdma_free_qp(struct pvrdma_qp *qp)
 463{
 464        struct pvrdma_cq *scq;
 465        struct pvrdma_cq *rcq;
 466        unsigned long scq_flags, rcq_flags;
 467
 468        /* In case cq is polling */
 469        get_cqs(qp, &scq, &rcq);
 470        pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 471
 472        _pvrdma_flush_cqe(qp, scq);
 473        if (scq != rcq)
 474                _pvrdma_flush_cqe(qp, rcq);
 475
 476        /*
 477         * We're now unlocking the CQs before clearing out the qp handle this
 478         * should still be safe. We have destroyed the backend QP and flushed
 479         * the CQEs so there should be no other completions for this QP.
 480         */
 481        pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 482
 483        _pvrdma_free_qp(qp);
 484}
 485
 486static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev,
 487                                           u32 qp_handle)
 488{
 489        union pvrdma_cmd_req req;
 490        struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp;
 491        int ret;
 492
 493        memset(cmd, 0, sizeof(*cmd));
 494        cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP;
 495        cmd->qp_handle = qp_handle;
 496
 497        ret = pvrdma_cmd_post(dev, &req, NULL, 0);
 498        if (ret < 0)
 499                dev_warn(&dev->pdev->dev,
 500                         "destroy queuepair failed, error: %d\n", ret);
 501}
 502
 503/**
 504 * pvrdma_destroy_qp - destroy a queue pair
 505 * @qp: the queue pair to destroy
 506 * @udata: user data or null for kernel object
 507 *
 508 * @return: always 0.
 509 */
 510int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
 511{
 512        struct pvrdma_qp *vqp = to_vqp(qp);
 513
 514        _pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle);
 515        pvrdma_free_qp(vqp);
 516
 517        return 0;
 518}
 519
 520static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
 521                                struct pvrdma_qp *qp)
 522{
 523        _pvrdma_destroy_qp_work(dev, qp->qp_handle);
 524        _pvrdma_free_qp(qp);
 525}
 526
 527/**
 528 * pvrdma_modify_qp - modify queue pair attributes
 529 * @ibqp: the queue pair
 530 * @attr: the new queue pair's attributes
 531 * @attr_mask: attributes mask
 532 * @udata: user data
 533 *
 534 * @returns 0 on success, otherwise returns an errno.
 535 */
 536int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 537                     int attr_mask, struct ib_udata *udata)
 538{
 539        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 540        struct pvrdma_qp *qp = to_vqp(ibqp);
 541        union pvrdma_cmd_req req;
 542        union pvrdma_cmd_resp rsp;
 543        struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp;
 544        enum ib_qp_state cur_state, next_state;
 545        int ret;
 546
 547        if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
 548                return -EOPNOTSUPP;
 549
 550        /* Sanity checking. Should need lock here */
 551        mutex_lock(&qp->mutex);
 552        cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state :
 553                qp->state;
 554        next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
 555
 556        if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
 557                                attr_mask)) {
 558                ret = -EINVAL;
 559                goto out;
 560        }
 561
 562        if (attr_mask & IB_QP_PORT) {
 563                if (attr->port_num == 0 ||
 564                    attr->port_num > ibqp->device->phys_port_cnt) {
 565                        ret = -EINVAL;
 566                        goto out;
 567                }
 568        }
 569
 570        if (attr_mask & IB_QP_MIN_RNR_TIMER) {
 571                if (attr->min_rnr_timer > 31) {
 572                        ret = -EINVAL;
 573                        goto out;
 574                }
 575        }
 576
 577        if (attr_mask & IB_QP_PKEY_INDEX) {
 578                if (attr->pkey_index >= dev->dsr->caps.max_pkeys) {
 579                        ret = -EINVAL;
 580                        goto out;
 581                }
 582        }
 583
 584        if (attr_mask & IB_QP_QKEY)
 585                qp->qkey = attr->qkey;
 586
 587        if (cur_state == next_state && cur_state == IB_QPS_RESET) {
 588                ret = 0;
 589                goto out;
 590        }
 591
 592        qp->state = next_state;
 593        memset(cmd, 0, sizeof(*cmd));
 594        cmd->hdr.cmd = PVRDMA_CMD_MODIFY_QP;
 595        cmd->qp_handle = qp->qp_handle;
 596        cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
 597        cmd->attrs.qp_state = ib_qp_state_to_pvrdma(attr->qp_state);
 598        cmd->attrs.cur_qp_state =
 599                ib_qp_state_to_pvrdma(attr->cur_qp_state);
 600        cmd->attrs.path_mtu = ib_mtu_to_pvrdma(attr->path_mtu);
 601        cmd->attrs.path_mig_state =
 602                ib_mig_state_to_pvrdma(attr->path_mig_state);
 603        cmd->attrs.qkey = attr->qkey;
 604        cmd->attrs.rq_psn = attr->rq_psn;
 605        cmd->attrs.sq_psn = attr->sq_psn;
 606        cmd->attrs.dest_qp_num = attr->dest_qp_num;
 607        cmd->attrs.qp_access_flags =
 608                ib_access_flags_to_pvrdma(attr->qp_access_flags);
 609        cmd->attrs.pkey_index = attr->pkey_index;
 610        cmd->attrs.alt_pkey_index = attr->alt_pkey_index;
 611        cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify;
 612        cmd->attrs.sq_draining = attr->sq_draining;
 613        cmd->attrs.max_rd_atomic = attr->max_rd_atomic;
 614        cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic;
 615        cmd->attrs.min_rnr_timer = attr->min_rnr_timer;
 616        cmd->attrs.port_num = attr->port_num;
 617        cmd->attrs.timeout = attr->timeout;
 618        cmd->attrs.retry_cnt = attr->retry_cnt;
 619        cmd->attrs.rnr_retry = attr->rnr_retry;
 620        cmd->attrs.alt_port_num = attr->alt_port_num;
 621        cmd->attrs.alt_timeout = attr->alt_timeout;
 622        ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap);
 623        rdma_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
 624        rdma_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
 625
 626        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP);
 627        if (ret < 0) {
 628                dev_warn(&dev->pdev->dev,
 629                         "could not modify queuepair, error: %d\n", ret);
 630        } else if (rsp.hdr.err > 0) {
 631                dev_warn(&dev->pdev->dev,
 632                         "cannot modify queuepair, error: %d\n", rsp.hdr.err);
 633                ret = -EINVAL;
 634        }
 635
 636        if (ret == 0 && next_state == IB_QPS_RESET)
 637                pvrdma_reset_qp(qp);
 638
 639out:
 640        mutex_unlock(&qp->mutex);
 641
 642        return ret;
 643}
 644
 645static inline void *get_sq_wqe(struct pvrdma_qp *qp, unsigned int n)
 646{
 647        return pvrdma_page_dir_get_ptr(&qp->pdir,
 648                                       qp->sq.offset + n * qp->sq.wqe_size);
 649}
 650
 651static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
 652{
 653        return pvrdma_page_dir_get_ptr(&qp->pdir,
 654                                       qp->rq.offset + n * qp->rq.wqe_size);
 655}
 656
 657static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr,
 658                       const struct ib_reg_wr *wr)
 659{
 660        struct pvrdma_user_mr *mr = to_vmr(wr->mr);
 661
 662        wqe_hdr->wr.fast_reg.iova_start = mr->ibmr.iova;
 663        wqe_hdr->wr.fast_reg.pl_pdir_dma = mr->pdir.dir_dma;
 664        wqe_hdr->wr.fast_reg.page_shift = mr->page_shift;
 665        wqe_hdr->wr.fast_reg.page_list_len = mr->npages;
 666        wqe_hdr->wr.fast_reg.length = mr->ibmr.length;
 667        wqe_hdr->wr.fast_reg.access_flags = wr->access;
 668        wqe_hdr->wr.fast_reg.rkey = wr->key;
 669
 670        return pvrdma_page_dir_insert_page_list(&mr->pdir, mr->pages,
 671                                                mr->npages);
 672}
 673
 674/**
 675 * pvrdma_post_send - post send work request entries on a QP
 676 * @ibqp: the QP
 677 * @wr: work request list to post
 678 * @bad_wr: the first bad WR returned
 679 *
 680 * @return: 0 on success, otherwise errno returned.
 681 */
 682int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 683                     const struct ib_send_wr **bad_wr)
 684{
 685        struct pvrdma_qp *qp = to_vqp(ibqp);
 686        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 687        unsigned long flags;
 688        struct pvrdma_sq_wqe_hdr *wqe_hdr;
 689        struct pvrdma_sge *sge;
 690        int i, ret;
 691
 692        /*
 693         * In states lower than RTS, we can fail immediately. In other states,
 694         * just post and let the device figure it out.
 695         */
 696        if (qp->state < IB_QPS_RTS) {
 697                *bad_wr = wr;
 698                return -EINVAL;
 699        }
 700
 701        spin_lock_irqsave(&qp->sq.lock, flags);
 702
 703        while (wr) {
 704                unsigned int tail = 0;
 705
 706                if (unlikely(!pvrdma_idx_ring_has_space(
 707                                qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
 708                        dev_warn_ratelimited(&dev->pdev->dev,
 709                                             "send queue is full\n");
 710                        *bad_wr = wr;
 711                        ret = -ENOMEM;
 712                        goto out;
 713                }
 714
 715                if (unlikely(wr->num_sge > qp->sq.max_sg || wr->num_sge < 0)) {
 716                        dev_warn_ratelimited(&dev->pdev->dev,
 717                                             "send SGE overflow\n");
 718                        *bad_wr = wr;
 719                        ret = -EINVAL;
 720                        goto out;
 721                }
 722
 723                if (unlikely(wr->opcode < 0)) {
 724                        dev_warn_ratelimited(&dev->pdev->dev,
 725                                             "invalid send opcode\n");
 726                        *bad_wr = wr;
 727                        ret = -EINVAL;
 728                        goto out;
 729                }
 730
 731                /*
 732                 * Only support UD, RC.
 733                 * Need to check opcode table for thorough checking.
 734                 * opcode               _UD     _UC     _RC
 735                 * _SEND                x       x       x
 736                 * _SEND_WITH_IMM       x       x       x
 737                 * _RDMA_WRITE                  x       x
 738                 * _RDMA_WRITE_WITH_IMM         x       x
 739                 * _LOCAL_INV                   x       x
 740                 * _SEND_WITH_INV               x       x
 741                 * _RDMA_READ                           x
 742                 * _ATOMIC_CMP_AND_SWP                  x
 743                 * _ATOMIC_FETCH_AND_ADD                x
 744                 * _MASK_ATOMIC_CMP_AND_SWP             x
 745                 * _MASK_ATOMIC_FETCH_AND_ADD           x
 746                 * _REG_MR                              x
 747                 *
 748                 */
 749                if (qp->ibqp.qp_type != IB_QPT_UD &&
 750                    qp->ibqp.qp_type != IB_QPT_RC &&
 751                        wr->opcode != IB_WR_SEND) {
 752                        dev_warn_ratelimited(&dev->pdev->dev,
 753                                             "unsupported queuepair type\n");
 754                        *bad_wr = wr;
 755                        ret = -EINVAL;
 756                        goto out;
 757                } else if (qp->ibqp.qp_type == IB_QPT_UD ||
 758                           qp->ibqp.qp_type == IB_QPT_GSI) {
 759                        if (wr->opcode != IB_WR_SEND &&
 760                            wr->opcode != IB_WR_SEND_WITH_IMM) {
 761                                dev_warn_ratelimited(&dev->pdev->dev,
 762                                                     "invalid send opcode\n");
 763                                *bad_wr = wr;
 764                                ret = -EINVAL;
 765                                goto out;
 766                        }
 767                }
 768
 769                wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, tail);
 770                memset(wqe_hdr, 0, sizeof(*wqe_hdr));
 771                wqe_hdr->wr_id = wr->wr_id;
 772                wqe_hdr->num_sge = wr->num_sge;
 773                wqe_hdr->opcode = ib_wr_opcode_to_pvrdma(wr->opcode);
 774                wqe_hdr->send_flags = ib_send_flags_to_pvrdma(wr->send_flags);
 775                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
 776                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
 777                        wqe_hdr->ex.imm_data = wr->ex.imm_data;
 778
 779                if (unlikely(wqe_hdr->opcode == PVRDMA_WR_ERROR)) {
 780                        *bad_wr = wr;
 781                        ret = -EINVAL;
 782                        goto out;
 783                }
 784
 785                switch (qp->ibqp.qp_type) {
 786                case IB_QPT_GSI:
 787                case IB_QPT_UD:
 788                        if (unlikely(!ud_wr(wr)->ah)) {
 789                                dev_warn_ratelimited(&dev->pdev->dev,
 790                                                     "invalid address handle\n");
 791                                *bad_wr = wr;
 792                                ret = -EINVAL;
 793                                goto out;
 794                        }
 795
 796                        /*
 797                         * Use qkey from qp context if high order bit set,
 798                         * otherwise from work request.
 799                         */
 800                        wqe_hdr->wr.ud.remote_qpn = ud_wr(wr)->remote_qpn;
 801                        wqe_hdr->wr.ud.remote_qkey =
 802                                ud_wr(wr)->remote_qkey & 0x80000000 ?
 803                                qp->qkey : ud_wr(wr)->remote_qkey;
 804                        wqe_hdr->wr.ud.av = to_vah(ud_wr(wr)->ah)->av;
 805
 806                        break;
 807                case IB_QPT_RC:
 808                        switch (wr->opcode) {
 809                        case IB_WR_RDMA_READ:
 810                        case IB_WR_RDMA_WRITE:
 811                        case IB_WR_RDMA_WRITE_WITH_IMM:
 812                                wqe_hdr->wr.rdma.remote_addr =
 813                                        rdma_wr(wr)->remote_addr;
 814                                wqe_hdr->wr.rdma.rkey = rdma_wr(wr)->rkey;
 815                                break;
 816                        case IB_WR_LOCAL_INV:
 817                        case IB_WR_SEND_WITH_INV:
 818                                wqe_hdr->ex.invalidate_rkey =
 819                                        wr->ex.invalidate_rkey;
 820                                break;
 821                        case IB_WR_ATOMIC_CMP_AND_SWP:
 822                        case IB_WR_ATOMIC_FETCH_AND_ADD:
 823                                wqe_hdr->wr.atomic.remote_addr =
 824                                        atomic_wr(wr)->remote_addr;
 825                                wqe_hdr->wr.atomic.rkey = atomic_wr(wr)->rkey;
 826                                wqe_hdr->wr.atomic.compare_add =
 827                                        atomic_wr(wr)->compare_add;
 828                                if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP)
 829                                        wqe_hdr->wr.atomic.swap =
 830                                                atomic_wr(wr)->swap;
 831                                break;
 832                        case IB_WR_REG_MR:
 833                                ret = set_reg_seg(wqe_hdr, reg_wr(wr));
 834                                if (ret < 0) {
 835                                        dev_warn_ratelimited(&dev->pdev->dev,
 836                                                             "Failed to set fast register work request\n");
 837                                        *bad_wr = wr;
 838                                        goto out;
 839                                }
 840                                break;
 841                        default:
 842                                break;
 843                        }
 844
 845                        break;
 846                default:
 847                        dev_warn_ratelimited(&dev->pdev->dev,
 848                                             "invalid queuepair type\n");
 849                        ret = -EINVAL;
 850                        *bad_wr = wr;
 851                        goto out;
 852                }
 853
 854                sge = (struct pvrdma_sge *)(wqe_hdr + 1);
 855                for (i = 0; i < wr->num_sge; i++) {
 856                        /* Need to check wqe_size 0 or max size */
 857                        sge->addr = wr->sg_list[i].addr;
 858                        sge->length = wr->sg_list[i].length;
 859                        sge->lkey = wr->sg_list[i].lkey;
 860                        sge++;
 861                }
 862
 863                /* Make sure wqe is written before index update */
 864                smp_wmb();
 865
 866                /* Update shared sq ring */
 867                pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
 868                                    qp->sq.wqe_cnt);
 869
 870                wr = wr->next;
 871        }
 872
 873        ret = 0;
 874
 875out:
 876        spin_unlock_irqrestore(&qp->sq.lock, flags);
 877
 878        if (!ret)
 879                pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_SEND | qp->qp_handle);
 880
 881        return ret;
 882}
 883
 884/**
 885 * pvrdma_post_recv - post receive work request entries on a QP
 886 * @ibqp: the QP
 887 * @wr: the work request list to post
 888 * @bad_wr: the first bad WR returned
 889 *
 890 * @return: 0 on success, otherwise errno returned.
 891 */
 892int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 893                     const struct ib_recv_wr **bad_wr)
 894{
 895        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 896        unsigned long flags;
 897        struct pvrdma_qp *qp = to_vqp(ibqp);
 898        struct pvrdma_rq_wqe_hdr *wqe_hdr;
 899        struct pvrdma_sge *sge;
 900        int ret = 0;
 901        int i;
 902
 903        /*
 904         * In the RESET state, we can fail immediately. For other states,
 905         * just post and let the device figure it out.
 906         */
 907        if (qp->state == IB_QPS_RESET) {
 908                *bad_wr = wr;
 909                return -EINVAL;
 910        }
 911
 912        if (qp->srq) {
 913                dev_warn(&dev->pdev->dev, "QP associated with SRQ\n");
 914                *bad_wr = wr;
 915                return -EINVAL;
 916        }
 917
 918        spin_lock_irqsave(&qp->rq.lock, flags);
 919
 920        while (wr) {
 921                unsigned int tail = 0;
 922
 923                if (unlikely(wr->num_sge > qp->rq.max_sg ||
 924                             wr->num_sge < 0)) {
 925                        ret = -EINVAL;
 926                        *bad_wr = wr;
 927                        dev_warn_ratelimited(&dev->pdev->dev,
 928                                             "recv SGE overflow\n");
 929                        goto out;
 930                }
 931
 932                if (unlikely(!pvrdma_idx_ring_has_space(
 933                                qp->rq.ring, qp->rq.wqe_cnt, &tail))) {
 934                        ret = -ENOMEM;
 935                        *bad_wr = wr;
 936                        dev_warn_ratelimited(&dev->pdev->dev,
 937                                             "recv queue full\n");
 938                        goto out;
 939                }
 940
 941                wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, tail);
 942                wqe_hdr->wr_id = wr->wr_id;
 943                wqe_hdr->num_sge = wr->num_sge;
 944                wqe_hdr->total_len = 0;
 945
 946                sge = (struct pvrdma_sge *)(wqe_hdr + 1);
 947                for (i = 0; i < wr->num_sge; i++) {
 948                        sge->addr = wr->sg_list[i].addr;
 949                        sge->length = wr->sg_list[i].length;
 950                        sge->lkey = wr->sg_list[i].lkey;
 951                        sge++;
 952                }
 953
 954                /* Make sure wqe is written before index update */
 955                smp_wmb();
 956
 957                /* Update shared rq ring */
 958                pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
 959                                    qp->rq.wqe_cnt);
 960
 961                wr = wr->next;
 962        }
 963
 964        spin_unlock_irqrestore(&qp->rq.lock, flags);
 965
 966        pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_RECV | qp->qp_handle);
 967
 968        return ret;
 969
 970out:
 971        spin_unlock_irqrestore(&qp->rq.lock, flags);
 972
 973        return ret;
 974}
 975
 976/**
 977 * pvrdma_query_qp - query a queue pair's attributes
 978 * @ibqp: the queue pair to query
 979 * @attr: the queue pair's attributes
 980 * @attr_mask: attributes mask
 981 * @init_attr: initial queue pair attributes
 982 *
 983 * @returns 0 on success, otherwise returns an errno.
 984 */
 985int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 986                    int attr_mask, struct ib_qp_init_attr *init_attr)
 987{
 988        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 989        struct pvrdma_qp *qp = to_vqp(ibqp);
 990        union pvrdma_cmd_req req;
 991        union pvrdma_cmd_resp rsp;
 992        struct pvrdma_cmd_query_qp *cmd = &req.query_qp;
 993        struct pvrdma_cmd_query_qp_resp *resp = &rsp.query_qp_resp;
 994        int ret = 0;
 995
 996        mutex_lock(&qp->mutex);
 997
 998        if (qp->state == IB_QPS_RESET) {
 999                attr->qp_state = IB_QPS_RESET;
1000                goto out;
1001        }
1002
1003        memset(cmd, 0, sizeof(*cmd));
1004        cmd->hdr.cmd = PVRDMA_CMD_QUERY_QP;
1005        cmd->qp_handle = qp->qp_handle;
1006        cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
1007
1008        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_QP_RESP);
1009        if (ret < 0) {
1010                dev_warn(&dev->pdev->dev,
1011                         "could not query queuepair, error: %d\n", ret);
1012                goto out;
1013        }
1014
1015        attr->qp_state = pvrdma_qp_state_to_ib(resp->attrs.qp_state);
1016        attr->cur_qp_state =
1017                pvrdma_qp_state_to_ib(resp->attrs.cur_qp_state);
1018        attr->path_mtu = pvrdma_mtu_to_ib(resp->attrs.path_mtu);
1019        attr->path_mig_state =
1020                pvrdma_mig_state_to_ib(resp->attrs.path_mig_state);
1021        attr->qkey = resp->attrs.qkey;
1022        attr->rq_psn = resp->attrs.rq_psn;
1023        attr->sq_psn = resp->attrs.sq_psn;
1024        attr->dest_qp_num = resp->attrs.dest_qp_num;
1025        attr->qp_access_flags =
1026                pvrdma_access_flags_to_ib(resp->attrs.qp_access_flags);
1027        attr->pkey_index = resp->attrs.pkey_index;
1028        attr->alt_pkey_index = resp->attrs.alt_pkey_index;
1029        attr->en_sqd_async_notify = resp->attrs.en_sqd_async_notify;
1030        attr->sq_draining = resp->attrs.sq_draining;
1031        attr->max_rd_atomic = resp->attrs.max_rd_atomic;
1032        attr->max_dest_rd_atomic = resp->attrs.max_dest_rd_atomic;
1033        attr->min_rnr_timer = resp->attrs.min_rnr_timer;
1034        attr->port_num = resp->attrs.port_num;
1035        attr->timeout = resp->attrs.timeout;
1036        attr->retry_cnt = resp->attrs.retry_cnt;
1037        attr->rnr_retry = resp->attrs.rnr_retry;
1038        attr->alt_port_num = resp->attrs.alt_port_num;
1039        attr->alt_timeout = resp->attrs.alt_timeout;
1040        pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap);
1041        pvrdma_ah_attr_to_rdma(&attr->ah_attr, &resp->attrs.ah_attr);
1042        pvrdma_ah_attr_to_rdma(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
1043
1044        qp->state = attr->qp_state;
1045
1046        ret = 0;
1047
1048out:
1049        attr->cur_qp_state = attr->qp_state;
1050
1051        init_attr->event_handler = qp->ibqp.event_handler;
1052        init_attr->qp_context = qp->ibqp.qp_context;
1053        init_attr->send_cq = qp->ibqp.send_cq;
1054        init_attr->recv_cq = qp->ibqp.recv_cq;
1055        init_attr->srq = qp->ibqp.srq;
1056        init_attr->xrcd = NULL;
1057        init_attr->cap = attr->cap;
1058        init_attr->sq_sig_type = 0;
1059        init_attr->qp_type = qp->ibqp.qp_type;
1060        init_attr->create_flags = 0;
1061        init_attr->port_num = qp->port;
1062
1063        mutex_unlock(&qp->mutex);
1064        return ret;
1065}
1066