linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of EITHER the GNU General Public License
   6 * version 2 as published by the Free Software Foundation or the BSD
   7 * 2-Clause License. This program is distributed in the hope that it
   8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
   9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  10 * See the GNU General Public License version 2 for more details at
  11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program available in the file COPYING in the main
  15 * directory of this source tree.
  16 *
  17 * The BSD 2-Clause License
  18 *
  19 *     Redistribution and use in source and binary forms, with or
  20 *     without modification, are permitted provided that the following
  21 *     conditions are met:
  22 *
  23 *      - Redistributions of source code must retain the above
  24 *        copyright notice, this list of conditions and the following
  25 *        disclaimer.
  26 *
  27 *      - Redistributions in binary form must reproduce the above
  28 *        copyright notice, this list of conditions and the following
  29 *        disclaimer in the documentation and/or other materials
  30 *        provided with the distribution.
  31 *
  32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  43 * OF THE POSSIBILITY OF SUCH DAMAGE.
  44 */
  45
  46#include <asm/page.h>
  47#include <linux/io.h>
  48#include <linux/wait.h>
  49#include <rdma/ib_addr.h>
  50#include <rdma/ib_smi.h>
  51#include <rdma/ib_user_verbs.h>
  52
  53#include "pvrdma.h"
  54
  55static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
  56                                struct pvrdma_qp *qp);
  57
  58static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq,
  59                           struct pvrdma_cq **recv_cq)
  60{
  61        *send_cq = to_vcq(qp->ibqp.send_cq);
  62        *recv_cq = to_vcq(qp->ibqp.recv_cq);
  63}
  64
  65static void pvrdma_lock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
  66                            unsigned long *scq_flags,
  67                            unsigned long *rcq_flags)
  68        __acquires(scq->cq_lock) __acquires(rcq->cq_lock)
  69{
  70        if (scq == rcq) {
  71                spin_lock_irqsave(&scq->cq_lock, *scq_flags);
  72                __acquire(rcq->cq_lock);
  73        } else if (scq->cq_handle < rcq->cq_handle) {
  74                spin_lock_irqsave(&scq->cq_lock, *scq_flags);
  75                spin_lock_irqsave_nested(&rcq->cq_lock, *rcq_flags,
  76                                         SINGLE_DEPTH_NESTING);
  77        } else {
  78                spin_lock_irqsave(&rcq->cq_lock, *rcq_flags);
  79                spin_lock_irqsave_nested(&scq->cq_lock, *scq_flags,
  80                                         SINGLE_DEPTH_NESTING);
  81        }
  82}
  83
  84static void pvrdma_unlock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
  85                              unsigned long *scq_flags,
  86                              unsigned long *rcq_flags)
  87        __releases(scq->cq_lock) __releases(rcq->cq_lock)
  88{
  89        if (scq == rcq) {
  90                __release(rcq->cq_lock);
  91                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  92        } else if (scq->cq_handle < rcq->cq_handle) {
  93                spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
  94                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  95        } else {
  96                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  97                spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
  98        }
  99}
 100
 101static void pvrdma_reset_qp(struct pvrdma_qp *qp)
 102{
 103        struct pvrdma_cq *scq, *rcq;
 104        unsigned long scq_flags, rcq_flags;
 105
 106        /* Clean up cqes */
 107        get_cqs(qp, &scq, &rcq);
 108        pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 109
 110        _pvrdma_flush_cqe(qp, scq);
 111        if (scq != rcq)
 112                _pvrdma_flush_cqe(qp, rcq);
 113
 114        pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 115
 116        /*
 117         * Reset queuepair. The checks are because usermode queuepairs won't
 118         * have kernel ringstates.
 119         */
 120        if (qp->rq.ring) {
 121                atomic_set(&qp->rq.ring->cons_head, 0);
 122                atomic_set(&qp->rq.ring->prod_tail, 0);
 123        }
 124        if (qp->sq.ring) {
 125                atomic_set(&qp->sq.ring->cons_head, 0);
 126                atomic_set(&qp->sq.ring->prod_tail, 0);
 127        }
 128}
 129
 130static int pvrdma_set_rq_size(struct pvrdma_dev *dev,
 131                              struct ib_qp_cap *req_cap,
 132                              struct pvrdma_qp *qp)
 133{
 134        if (req_cap->max_recv_wr > dev->dsr->caps.max_qp_wr ||
 135            req_cap->max_recv_sge > dev->dsr->caps.max_sge) {
 136                dev_warn(&dev->pdev->dev, "recv queue size invalid\n");
 137                return -EINVAL;
 138        }
 139
 140        qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_recv_wr));
 141        qp->rq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_recv_sge));
 142
 143        /* Write back */
 144        req_cap->max_recv_wr = qp->rq.wqe_cnt;
 145        req_cap->max_recv_sge = qp->rq.max_sg;
 146
 147        qp->rq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr) +
 148                                             sizeof(struct pvrdma_sge) *
 149                                             qp->rq.max_sg);
 150        qp->npages_recv = (qp->rq.wqe_cnt * qp->rq.wqe_size + PAGE_SIZE - 1) /
 151                          PAGE_SIZE;
 152
 153        return 0;
 154}
 155
 156static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
 157                              struct pvrdma_qp *qp)
 158{
 159        if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr ||
 160            req_cap->max_send_sge > dev->dsr->caps.max_sge) {
 161                dev_warn(&dev->pdev->dev, "send queue size invalid\n");
 162                return -EINVAL;
 163        }
 164
 165        qp->sq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_send_wr));
 166        qp->sq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_send_sge));
 167
 168        /* Write back */
 169        req_cap->max_send_wr = qp->sq.wqe_cnt;
 170        req_cap->max_send_sge = qp->sq.max_sg;
 171
 172        qp->sq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr) +
 173                                             sizeof(struct pvrdma_sge) *
 174                                             qp->sq.max_sg);
 175        /* Note: one extra page for the header. */
 176        qp->npages_send = PVRDMA_QP_NUM_HEADER_PAGES +
 177                          (qp->sq.wqe_cnt * qp->sq.wqe_size + PAGE_SIZE - 1) /
 178                                                                PAGE_SIZE;
 179
 180        return 0;
 181}
 182
 183/**
 184 * pvrdma_create_qp - create queue pair
 185 * @ibqp: queue pair
 186 * @init_attr: queue pair attributes
 187 * @udata: user data
 188 *
 189 * @return: the 0 on success, otherwise returns an errno.
 190 */
 191int pvrdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
 192                     struct ib_udata *udata)
 193{
 194        struct pvrdma_qp *qp = to_vqp(ibqp);
 195        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 196        union pvrdma_cmd_req req;
 197        union pvrdma_cmd_resp rsp;
 198        struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
 199        struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp;
 200        struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2;
 201        struct pvrdma_create_qp ucmd;
 202        struct pvrdma_create_qp_resp qp_resp = {};
 203        unsigned long flags;
 204        int ret;
 205        bool is_srq = !!init_attr->srq;
 206
 207        if (init_attr->create_flags) {
 208                dev_warn(&dev->pdev->dev,
 209                         "invalid create queuepair flags %#x\n",
 210                         init_attr->create_flags);
 211                return -EOPNOTSUPP;
 212        }
 213
 214        if (init_attr->qp_type != IB_QPT_RC &&
 215            init_attr->qp_type != IB_QPT_UD &&
 216            init_attr->qp_type != IB_QPT_GSI) {
 217                dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n",
 218                         init_attr->qp_type);
 219                return -EOPNOTSUPP;
 220        }
 221
 222        if (is_srq && !dev->dsr->caps.max_srq) {
 223                dev_warn(&dev->pdev->dev,
 224                         "SRQs not supported by device\n");
 225                return -EINVAL;
 226        }
 227
 228        if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp))
 229                return -ENOMEM;
 230
 231        switch (init_attr->qp_type) {
 232        case IB_QPT_GSI:
 233                if (init_attr->port_num == 0 ||
 234                    init_attr->port_num > ibqp->device->phys_port_cnt) {
 235                        dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
 236                        ret = -EINVAL;
 237                        goto err_qp;
 238                }
 239                fallthrough;
 240        case IB_QPT_RC:
 241        case IB_QPT_UD:
 242                spin_lock_init(&qp->sq.lock);
 243                spin_lock_init(&qp->rq.lock);
 244                mutex_init(&qp->mutex);
 245                refcount_set(&qp->refcnt, 1);
 246                init_completion(&qp->free);
 247
 248                qp->state = IB_QPS_RESET;
 249                qp->is_kernel = !udata;
 250
 251                if (!qp->is_kernel) {
 252                        dev_dbg(&dev->pdev->dev,
 253                                "create queuepair from user space\n");
 254
 255                        if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
 256                                ret = -EFAULT;
 257                                goto err_qp;
 258                        }
 259
 260                        /* Userspace supports qpn and qp handles? */
 261                        if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION &&
 262                            udata->outlen < sizeof(qp_resp)) {
 263                                dev_warn(&dev->pdev->dev,
 264                                         "create queuepair not supported\n");
 265                                ret = -EOPNOTSUPP;
 266                                goto err_qp;
 267                        }
 268
 269                        if (!is_srq) {
 270                                /* set qp->sq.wqe_cnt, shift, buf_size.. */
 271                                qp->rumem = ib_umem_get(ibqp->device,
 272                                                        ucmd.rbuf_addr,
 273                                                        ucmd.rbuf_size, 0);
 274                                if (IS_ERR(qp->rumem)) {
 275                                        ret = PTR_ERR(qp->rumem);
 276                                        goto err_qp;
 277                                }
 278                                qp->srq = NULL;
 279                        } else {
 280                                qp->rumem = NULL;
 281                                qp->srq = to_vsrq(init_attr->srq);
 282                        }
 283
 284                        qp->sumem = ib_umem_get(ibqp->device, ucmd.sbuf_addr,
 285                                                ucmd.sbuf_size, 0);
 286                        if (IS_ERR(qp->sumem)) {
 287                                if (!is_srq)
 288                                        ib_umem_release(qp->rumem);
 289                                ret = PTR_ERR(qp->sumem);
 290                                goto err_qp;
 291                        }
 292
 293                        qp->npages_send =
 294                                ib_umem_num_dma_blocks(qp->sumem, PAGE_SIZE);
 295                        if (!is_srq)
 296                                qp->npages_recv = ib_umem_num_dma_blocks(
 297                                        qp->rumem, PAGE_SIZE);
 298                        else
 299                                qp->npages_recv = 0;
 300                        qp->npages = qp->npages_send + qp->npages_recv;
 301                } else {
 302                        ret = pvrdma_set_sq_size(to_vdev(ibqp->device),
 303                                                 &init_attr->cap, qp);
 304                        if (ret)
 305                                goto err_qp;
 306
 307                        ret = pvrdma_set_rq_size(to_vdev(ibqp->device),
 308                                                 &init_attr->cap, qp);
 309                        if (ret)
 310                                goto err_qp;
 311
 312                        qp->npages = qp->npages_send + qp->npages_recv;
 313
 314                        /* Skip header page. */
 315                        qp->sq.offset = PVRDMA_QP_NUM_HEADER_PAGES * PAGE_SIZE;
 316
 317                        /* Recv queue pages are after send pages. */
 318                        qp->rq.offset = qp->npages_send * PAGE_SIZE;
 319                }
 320
 321                if (qp->npages < 0 || qp->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
 322                        dev_warn(&dev->pdev->dev,
 323                                 "overflow pages in queuepair\n");
 324                        ret = -EINVAL;
 325                        goto err_umem;
 326                }
 327
 328                ret = pvrdma_page_dir_init(dev, &qp->pdir, qp->npages,
 329                                           qp->is_kernel);
 330                if (ret) {
 331                        dev_warn(&dev->pdev->dev,
 332                                 "could not allocate page directory\n");
 333                        goto err_umem;
 334                }
 335
 336                if (!qp->is_kernel) {
 337                        pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0);
 338                        if (!is_srq)
 339                                pvrdma_page_dir_insert_umem(&qp->pdir,
 340                                                            qp->rumem,
 341                                                            qp->npages_send);
 342                } else {
 343                        /* Ring state is always the first page. */
 344                        qp->sq.ring = qp->pdir.pages[0];
 345                        qp->rq.ring = is_srq ? NULL : &qp->sq.ring[1];
 346                }
 347                break;
 348        default:
 349                ret = -EINVAL;
 350                goto err_qp;
 351        }
 352
 353        /* Not supported */
 354        init_attr->cap.max_inline_data = 0;
 355
 356        memset(cmd, 0, sizeof(*cmd));
 357        cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP;
 358        cmd->pd_handle = to_vpd(ibqp->pd)->pd_handle;
 359        cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle;
 360        cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle;
 361        if (is_srq)
 362                cmd->srq_handle = to_vsrq(init_attr->srq)->srq_handle;
 363        else
 364                cmd->srq_handle = 0;
 365        cmd->max_send_wr = init_attr->cap.max_send_wr;
 366        cmd->max_recv_wr = init_attr->cap.max_recv_wr;
 367        cmd->max_send_sge = init_attr->cap.max_send_sge;
 368        cmd->max_recv_sge = init_attr->cap.max_recv_sge;
 369        cmd->max_inline_data = init_attr->cap.max_inline_data;
 370        cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
 371        cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
 372        cmd->is_srq = is_srq;
 373        cmd->lkey = 0;
 374        cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
 375        cmd->total_chunks = qp->npages;
 376        cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES;
 377        cmd->pdir_dma = qp->pdir.dir_dma;
 378
 379        dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
 380                cmd->max_send_wr, cmd->max_recv_wr, cmd->max_send_sge,
 381                cmd->max_recv_sge);
 382
 383        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_QP_RESP);
 384        if (ret < 0) {
 385                dev_warn(&dev->pdev->dev,
 386                         "could not create queuepair, error: %d\n", ret);
 387                goto err_pdir;
 388        }
 389
 390        /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
 391        qp->port = init_attr->port_num;
 392
 393        if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) {
 394                qp->ibqp.qp_num = resp_v2->qpn;
 395                qp->qp_handle = resp_v2->qp_handle;
 396        } else {
 397                qp->ibqp.qp_num = resp->qpn;
 398                qp->qp_handle = resp->qpn;
 399        }
 400
 401        spin_lock_irqsave(&dev->qp_tbl_lock, flags);
 402        dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp;
 403        spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
 404
 405        if (udata) {
 406                qp_resp.qpn = qp->ibqp.qp_num;
 407                qp_resp.qp_handle = qp->qp_handle;
 408
 409                if (ib_copy_to_udata(udata, &qp_resp,
 410                                     min(udata->outlen, sizeof(qp_resp)))) {
 411                        dev_warn(&dev->pdev->dev,
 412                                 "failed to copy back udata\n");
 413                        __pvrdma_destroy_qp(dev, qp);
 414                        return -EINVAL;
 415                }
 416        }
 417
 418        return 0;
 419
 420err_pdir:
 421        pvrdma_page_dir_cleanup(dev, &qp->pdir);
 422err_umem:
 423        ib_umem_release(qp->rumem);
 424        ib_umem_release(qp->sumem);
 425err_qp:
 426        atomic_dec(&dev->num_qps);
 427        return ret;
 428}
 429
 430static void _pvrdma_free_qp(struct pvrdma_qp *qp)
 431{
 432        unsigned long flags;
 433        struct pvrdma_dev *dev = to_vdev(qp->ibqp.device);
 434
 435        spin_lock_irqsave(&dev->qp_tbl_lock, flags);
 436        dev->qp_tbl[qp->qp_handle] = NULL;
 437        spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
 438
 439        if (refcount_dec_and_test(&qp->refcnt))
 440                complete(&qp->free);
 441        wait_for_completion(&qp->free);
 442
 443        ib_umem_release(qp->rumem);
 444        ib_umem_release(qp->sumem);
 445
 446        pvrdma_page_dir_cleanup(dev, &qp->pdir);
 447
 448        atomic_dec(&dev->num_qps);
 449}
 450
 451static void pvrdma_free_qp(struct pvrdma_qp *qp)
 452{
 453        struct pvrdma_cq *scq;
 454        struct pvrdma_cq *rcq;
 455        unsigned long scq_flags, rcq_flags;
 456
 457        /* In case cq is polling */
 458        get_cqs(qp, &scq, &rcq);
 459        pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 460
 461        _pvrdma_flush_cqe(qp, scq);
 462        if (scq != rcq)
 463                _pvrdma_flush_cqe(qp, rcq);
 464
 465        /*
 466         * We're now unlocking the CQs before clearing out the qp handle this
 467         * should still be safe. We have destroyed the backend QP and flushed
 468         * the CQEs so there should be no other completions for this QP.
 469         */
 470        pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 471
 472        _pvrdma_free_qp(qp);
 473}
 474
 475static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev,
 476                                           u32 qp_handle)
 477{
 478        union pvrdma_cmd_req req;
 479        struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp;
 480        int ret;
 481
 482        memset(cmd, 0, sizeof(*cmd));
 483        cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP;
 484        cmd->qp_handle = qp_handle;
 485
 486        ret = pvrdma_cmd_post(dev, &req, NULL, 0);
 487        if (ret < 0)
 488                dev_warn(&dev->pdev->dev,
 489                         "destroy queuepair failed, error: %d\n", ret);
 490}
 491
 492/**
 493 * pvrdma_destroy_qp - destroy a queue pair
 494 * @qp: the queue pair to destroy
 495 * @udata: user data or null for kernel object
 496 *
 497 * @return: always 0.
 498 */
 499int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
 500{
 501        struct pvrdma_qp *vqp = to_vqp(qp);
 502
 503        _pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle);
 504        pvrdma_free_qp(vqp);
 505
 506        return 0;
 507}
 508
 509static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
 510                                struct pvrdma_qp *qp)
 511{
 512        _pvrdma_destroy_qp_work(dev, qp->qp_handle);
 513        _pvrdma_free_qp(qp);
 514}
 515
 516/**
 517 * pvrdma_modify_qp - modify queue pair attributes
 518 * @ibqp: the queue pair
 519 * @attr: the new queue pair's attributes
 520 * @attr_mask: attributes mask
 521 * @udata: user data
 522 *
 523 * @returns 0 on success, otherwise returns an errno.
 524 */
 525int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 526                     int attr_mask, struct ib_udata *udata)
 527{
 528        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 529        struct pvrdma_qp *qp = to_vqp(ibqp);
 530        union pvrdma_cmd_req req;
 531        union pvrdma_cmd_resp rsp;
 532        struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp;
 533        enum ib_qp_state cur_state, next_state;
 534        int ret;
 535
 536        if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
 537                return -EOPNOTSUPP;
 538
 539        /* Sanity checking. Should need lock here */
 540        mutex_lock(&qp->mutex);
 541        cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state :
 542                qp->state;
 543        next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
 544
 545        if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
 546                                attr_mask)) {
 547                ret = -EINVAL;
 548                goto out;
 549        }
 550
 551        if (attr_mask & IB_QP_PORT) {
 552                if (attr->port_num == 0 ||
 553                    attr->port_num > ibqp->device->phys_port_cnt) {
 554                        ret = -EINVAL;
 555                        goto out;
 556                }
 557        }
 558
 559        if (attr_mask & IB_QP_MIN_RNR_TIMER) {
 560                if (attr->min_rnr_timer > 31) {
 561                        ret = -EINVAL;
 562                        goto out;
 563                }
 564        }
 565
 566        if (attr_mask & IB_QP_PKEY_INDEX) {
 567                if (attr->pkey_index >= dev->dsr->caps.max_pkeys) {
 568                        ret = -EINVAL;
 569                        goto out;
 570                }
 571        }
 572
 573        if (attr_mask & IB_QP_QKEY)
 574                qp->qkey = attr->qkey;
 575
 576        if (cur_state == next_state && cur_state == IB_QPS_RESET) {
 577                ret = 0;
 578                goto out;
 579        }
 580
 581        qp->state = next_state;
 582        memset(cmd, 0, sizeof(*cmd));
 583        cmd->hdr.cmd = PVRDMA_CMD_MODIFY_QP;
 584        cmd->qp_handle = qp->qp_handle;
 585        cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
 586        cmd->attrs.qp_state = ib_qp_state_to_pvrdma(attr->qp_state);
 587        cmd->attrs.cur_qp_state =
 588                ib_qp_state_to_pvrdma(attr->cur_qp_state);
 589        cmd->attrs.path_mtu = ib_mtu_to_pvrdma(attr->path_mtu);
 590        cmd->attrs.path_mig_state =
 591                ib_mig_state_to_pvrdma(attr->path_mig_state);
 592        cmd->attrs.qkey = attr->qkey;
 593        cmd->attrs.rq_psn = attr->rq_psn;
 594        cmd->attrs.sq_psn = attr->sq_psn;
 595        cmd->attrs.dest_qp_num = attr->dest_qp_num;
 596        cmd->attrs.qp_access_flags =
 597                ib_access_flags_to_pvrdma(attr->qp_access_flags);
 598        cmd->attrs.pkey_index = attr->pkey_index;
 599        cmd->attrs.alt_pkey_index = attr->alt_pkey_index;
 600        cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify;
 601        cmd->attrs.sq_draining = attr->sq_draining;
 602        cmd->attrs.max_rd_atomic = attr->max_rd_atomic;
 603        cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic;
 604        cmd->attrs.min_rnr_timer = attr->min_rnr_timer;
 605        cmd->attrs.port_num = attr->port_num;
 606        cmd->attrs.timeout = attr->timeout;
 607        cmd->attrs.retry_cnt = attr->retry_cnt;
 608        cmd->attrs.rnr_retry = attr->rnr_retry;
 609        cmd->attrs.alt_port_num = attr->alt_port_num;
 610        cmd->attrs.alt_timeout = attr->alt_timeout;
 611        ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap);
 612        rdma_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
 613        rdma_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
 614
 615        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP);
 616        if (ret < 0) {
 617                dev_warn(&dev->pdev->dev,
 618                         "could not modify queuepair, error: %d\n", ret);
 619        } else if (rsp.hdr.err > 0) {
 620                dev_warn(&dev->pdev->dev,
 621                         "cannot modify queuepair, error: %d\n", rsp.hdr.err);
 622                ret = -EINVAL;
 623        }
 624
 625        if (ret == 0 && next_state == IB_QPS_RESET)
 626                pvrdma_reset_qp(qp);
 627
 628out:
 629        mutex_unlock(&qp->mutex);
 630
 631        return ret;
 632}
 633
 634static inline void *get_sq_wqe(struct pvrdma_qp *qp, unsigned int n)
 635{
 636        return pvrdma_page_dir_get_ptr(&qp->pdir,
 637                                       qp->sq.offset + n * qp->sq.wqe_size);
 638}
 639
 640static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
 641{
 642        return pvrdma_page_dir_get_ptr(&qp->pdir,
 643                                       qp->rq.offset + n * qp->rq.wqe_size);
 644}
 645
 646static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr,
 647                       const struct ib_reg_wr *wr)
 648{
 649        struct pvrdma_user_mr *mr = to_vmr(wr->mr);
 650
 651        wqe_hdr->wr.fast_reg.iova_start = mr->ibmr.iova;
 652        wqe_hdr->wr.fast_reg.pl_pdir_dma = mr->pdir.dir_dma;
 653        wqe_hdr->wr.fast_reg.page_shift = mr->page_shift;
 654        wqe_hdr->wr.fast_reg.page_list_len = mr->npages;
 655        wqe_hdr->wr.fast_reg.length = mr->ibmr.length;
 656        wqe_hdr->wr.fast_reg.access_flags = wr->access;
 657        wqe_hdr->wr.fast_reg.rkey = wr->key;
 658
 659        return pvrdma_page_dir_insert_page_list(&mr->pdir, mr->pages,
 660                                                mr->npages);
 661}
 662
 663/**
 664 * pvrdma_post_send - post send work request entries on a QP
 665 * @ibqp: the QP
 666 * @wr: work request list to post
 667 * @bad_wr: the first bad WR returned
 668 *
 669 * @return: 0 on success, otherwise errno returned.
 670 */
 671int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 672                     const struct ib_send_wr **bad_wr)
 673{
 674        struct pvrdma_qp *qp = to_vqp(ibqp);
 675        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 676        unsigned long flags;
 677        struct pvrdma_sq_wqe_hdr *wqe_hdr;
 678        struct pvrdma_sge *sge;
 679        int i, ret;
 680
 681        /*
 682         * In states lower than RTS, we can fail immediately. In other states,
 683         * just post and let the device figure it out.
 684         */
 685        if (qp->state < IB_QPS_RTS) {
 686                *bad_wr = wr;
 687                return -EINVAL;
 688        }
 689
 690        spin_lock_irqsave(&qp->sq.lock, flags);
 691
 692        while (wr) {
 693                unsigned int tail = 0;
 694
 695                if (unlikely(!pvrdma_idx_ring_has_space(
 696                                qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
 697                        dev_warn_ratelimited(&dev->pdev->dev,
 698                                             "send queue is full\n");
 699                        *bad_wr = wr;
 700                        ret = -ENOMEM;
 701                        goto out;
 702                }
 703
 704                if (unlikely(wr->num_sge > qp->sq.max_sg || wr->num_sge < 0)) {
 705                        dev_warn_ratelimited(&dev->pdev->dev,
 706                                             "send SGE overflow\n");
 707                        *bad_wr = wr;
 708                        ret = -EINVAL;
 709                        goto out;
 710                }
 711
 712                if (unlikely(wr->opcode < 0)) {
 713                        dev_warn_ratelimited(&dev->pdev->dev,
 714                                             "invalid send opcode\n");
 715                        *bad_wr = wr;
 716                        ret = -EINVAL;
 717                        goto out;
 718                }
 719
 720                /*
 721                 * Only support UD, RC.
 722                 * Need to check opcode table for thorough checking.
 723                 * opcode               _UD     _UC     _RC
 724                 * _SEND                x       x       x
 725                 * _SEND_WITH_IMM       x       x       x
 726                 * _RDMA_WRITE                  x       x
 727                 * _RDMA_WRITE_WITH_IMM         x       x
 728                 * _LOCAL_INV                   x       x
 729                 * _SEND_WITH_INV               x       x
 730                 * _RDMA_READ                           x
 731                 * _ATOMIC_CMP_AND_SWP                  x
 732                 * _ATOMIC_FETCH_AND_ADD                x
 733                 * _MASK_ATOMIC_CMP_AND_SWP             x
 734                 * _MASK_ATOMIC_FETCH_AND_ADD           x
 735                 * _REG_MR                              x
 736                 *
 737                 */
 738                if (qp->ibqp.qp_type != IB_QPT_UD &&
 739                    qp->ibqp.qp_type != IB_QPT_RC &&
 740                        wr->opcode != IB_WR_SEND) {
 741                        dev_warn_ratelimited(&dev->pdev->dev,
 742                                             "unsupported queuepair type\n");
 743                        *bad_wr = wr;
 744                        ret = -EINVAL;
 745                        goto out;
 746                } else if (qp->ibqp.qp_type == IB_QPT_UD ||
 747                           qp->ibqp.qp_type == IB_QPT_GSI) {
 748                        if (wr->opcode != IB_WR_SEND &&
 749                            wr->opcode != IB_WR_SEND_WITH_IMM) {
 750                                dev_warn_ratelimited(&dev->pdev->dev,
 751                                                     "invalid send opcode\n");
 752                                *bad_wr = wr;
 753                                ret = -EINVAL;
 754                                goto out;
 755                        }
 756                }
 757
 758                wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, tail);
 759                memset(wqe_hdr, 0, sizeof(*wqe_hdr));
 760                wqe_hdr->wr_id = wr->wr_id;
 761                wqe_hdr->num_sge = wr->num_sge;
 762                wqe_hdr->opcode = ib_wr_opcode_to_pvrdma(wr->opcode);
 763                wqe_hdr->send_flags = ib_send_flags_to_pvrdma(wr->send_flags);
 764                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
 765                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
 766                        wqe_hdr->ex.imm_data = wr->ex.imm_data;
 767
 768                if (unlikely(wqe_hdr->opcode == PVRDMA_WR_ERROR)) {
 769                        *bad_wr = wr;
 770                        ret = -EINVAL;
 771                        goto out;
 772                }
 773
 774                switch (qp->ibqp.qp_type) {
 775                case IB_QPT_GSI:
 776                case IB_QPT_UD:
 777                        if (unlikely(!ud_wr(wr)->ah)) {
 778                                dev_warn_ratelimited(&dev->pdev->dev,
 779                                                     "invalid address handle\n");
 780                                *bad_wr = wr;
 781                                ret = -EINVAL;
 782                                goto out;
 783                        }
 784
 785                        /*
 786                         * Use qkey from qp context if high order bit set,
 787                         * otherwise from work request.
 788                         */
 789                        wqe_hdr->wr.ud.remote_qpn = ud_wr(wr)->remote_qpn;
 790                        wqe_hdr->wr.ud.remote_qkey =
 791                                ud_wr(wr)->remote_qkey & 0x80000000 ?
 792                                qp->qkey : ud_wr(wr)->remote_qkey;
 793                        wqe_hdr->wr.ud.av = to_vah(ud_wr(wr)->ah)->av;
 794
 795                        break;
 796                case IB_QPT_RC:
 797                        switch (wr->opcode) {
 798                        case IB_WR_RDMA_READ:
 799                        case IB_WR_RDMA_WRITE:
 800                        case IB_WR_RDMA_WRITE_WITH_IMM:
 801                                wqe_hdr->wr.rdma.remote_addr =
 802                                        rdma_wr(wr)->remote_addr;
 803                                wqe_hdr->wr.rdma.rkey = rdma_wr(wr)->rkey;
 804                                break;
 805                        case IB_WR_LOCAL_INV:
 806                        case IB_WR_SEND_WITH_INV:
 807                                wqe_hdr->ex.invalidate_rkey =
 808                                        wr->ex.invalidate_rkey;
 809                                break;
 810                        case IB_WR_ATOMIC_CMP_AND_SWP:
 811                        case IB_WR_ATOMIC_FETCH_AND_ADD:
 812                                wqe_hdr->wr.atomic.remote_addr =
 813                                        atomic_wr(wr)->remote_addr;
 814                                wqe_hdr->wr.atomic.rkey = atomic_wr(wr)->rkey;
 815                                wqe_hdr->wr.atomic.compare_add =
 816                                        atomic_wr(wr)->compare_add;
 817                                if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP)
 818                                        wqe_hdr->wr.atomic.swap =
 819                                                atomic_wr(wr)->swap;
 820                                break;
 821                        case IB_WR_REG_MR:
 822                                ret = set_reg_seg(wqe_hdr, reg_wr(wr));
 823                                if (ret < 0) {
 824                                        dev_warn_ratelimited(&dev->pdev->dev,
 825                                                             "Failed to set fast register work request\n");
 826                                        *bad_wr = wr;
 827                                        goto out;
 828                                }
 829                                break;
 830                        default:
 831                                break;
 832                        }
 833
 834                        break;
 835                default:
 836                        dev_warn_ratelimited(&dev->pdev->dev,
 837                                             "invalid queuepair type\n");
 838                        ret = -EINVAL;
 839                        *bad_wr = wr;
 840                        goto out;
 841                }
 842
 843                sge = (struct pvrdma_sge *)(wqe_hdr + 1);
 844                for (i = 0; i < wr->num_sge; i++) {
 845                        /* Need to check wqe_size 0 or max size */
 846                        sge->addr = wr->sg_list[i].addr;
 847                        sge->length = wr->sg_list[i].length;
 848                        sge->lkey = wr->sg_list[i].lkey;
 849                        sge++;
 850                }
 851
 852                /* Make sure wqe is written before index update */
 853                smp_wmb();
 854
 855                /* Update shared sq ring */
 856                pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
 857                                    qp->sq.wqe_cnt);
 858
 859                wr = wr->next;
 860        }
 861
 862        ret = 0;
 863
 864out:
 865        spin_unlock_irqrestore(&qp->sq.lock, flags);
 866
 867        if (!ret)
 868                pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_SEND | qp->qp_handle);
 869
 870        return ret;
 871}
 872
 873/**
 874 * pvrdma_post_recv - post receive work request entries on a QP
 875 * @ibqp: the QP
 876 * @wr: the work request list to post
 877 * @bad_wr: the first bad WR returned
 878 *
 879 * @return: 0 on success, otherwise errno returned.
 880 */
 881int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 882                     const struct ib_recv_wr **bad_wr)
 883{
 884        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 885        unsigned long flags;
 886        struct pvrdma_qp *qp = to_vqp(ibqp);
 887        struct pvrdma_rq_wqe_hdr *wqe_hdr;
 888        struct pvrdma_sge *sge;
 889        int ret = 0;
 890        int i;
 891
 892        /*
 893         * In the RESET state, we can fail immediately. For other states,
 894         * just post and let the device figure it out.
 895         */
 896        if (qp->state == IB_QPS_RESET) {
 897                *bad_wr = wr;
 898                return -EINVAL;
 899        }
 900
 901        if (qp->srq) {
 902                dev_warn(&dev->pdev->dev, "QP associated with SRQ\n");
 903                *bad_wr = wr;
 904                return -EINVAL;
 905        }
 906
 907        spin_lock_irqsave(&qp->rq.lock, flags);
 908
 909        while (wr) {
 910                unsigned int tail = 0;
 911
 912                if (unlikely(wr->num_sge > qp->rq.max_sg ||
 913                             wr->num_sge < 0)) {
 914                        ret = -EINVAL;
 915                        *bad_wr = wr;
 916                        dev_warn_ratelimited(&dev->pdev->dev,
 917                                             "recv SGE overflow\n");
 918                        goto out;
 919                }
 920
 921                if (unlikely(!pvrdma_idx_ring_has_space(
 922                                qp->rq.ring, qp->rq.wqe_cnt, &tail))) {
 923                        ret = -ENOMEM;
 924                        *bad_wr = wr;
 925                        dev_warn_ratelimited(&dev->pdev->dev,
 926                                             "recv queue full\n");
 927                        goto out;
 928                }
 929
 930                wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, tail);
 931                wqe_hdr->wr_id = wr->wr_id;
 932                wqe_hdr->num_sge = wr->num_sge;
 933                wqe_hdr->total_len = 0;
 934
 935                sge = (struct pvrdma_sge *)(wqe_hdr + 1);
 936                for (i = 0; i < wr->num_sge; i++) {
 937                        sge->addr = wr->sg_list[i].addr;
 938                        sge->length = wr->sg_list[i].length;
 939                        sge->lkey = wr->sg_list[i].lkey;
 940                        sge++;
 941                }
 942
 943                /* Make sure wqe is written before index update */
 944                smp_wmb();
 945
 946                /* Update shared rq ring */
 947                pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
 948                                    qp->rq.wqe_cnt);
 949
 950                wr = wr->next;
 951        }
 952
 953        spin_unlock_irqrestore(&qp->rq.lock, flags);
 954
 955        pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_RECV | qp->qp_handle);
 956
 957        return ret;
 958
 959out:
 960        spin_unlock_irqrestore(&qp->rq.lock, flags);
 961
 962        return ret;
 963}
 964
 965/**
 966 * pvrdma_query_qp - query a queue pair's attributes
 967 * @ibqp: the queue pair to query
 968 * @attr: the queue pair's attributes
 969 * @attr_mask: attributes mask
 970 * @init_attr: initial queue pair attributes
 971 *
 972 * @returns 0 on success, otherwise returns an errno.
 973 */
 974int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 975                    int attr_mask, struct ib_qp_init_attr *init_attr)
 976{
 977        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 978        struct pvrdma_qp *qp = to_vqp(ibqp);
 979        union pvrdma_cmd_req req;
 980        union pvrdma_cmd_resp rsp;
 981        struct pvrdma_cmd_query_qp *cmd = &req.query_qp;
 982        struct pvrdma_cmd_query_qp_resp *resp = &rsp.query_qp_resp;
 983        int ret = 0;
 984
 985        mutex_lock(&qp->mutex);
 986
 987        if (qp->state == IB_QPS_RESET) {
 988                attr->qp_state = IB_QPS_RESET;
 989                goto out;
 990        }
 991
 992        memset(cmd, 0, sizeof(*cmd));
 993        cmd->hdr.cmd = PVRDMA_CMD_QUERY_QP;
 994        cmd->qp_handle = qp->qp_handle;
 995        cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
 996
 997        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_QP_RESP);
 998        if (ret < 0) {
 999                dev_warn(&dev->pdev->dev,
1000                         "could not query queuepair, error: %d\n", ret);
1001                goto out;
1002        }
1003
1004        attr->qp_state = pvrdma_qp_state_to_ib(resp->attrs.qp_state);
1005        attr->cur_qp_state =
1006                pvrdma_qp_state_to_ib(resp->attrs.cur_qp_state);
1007        attr->path_mtu = pvrdma_mtu_to_ib(resp->attrs.path_mtu);
1008        attr->path_mig_state =
1009                pvrdma_mig_state_to_ib(resp->attrs.path_mig_state);
1010        attr->qkey = resp->attrs.qkey;
1011        attr->rq_psn = resp->attrs.rq_psn;
1012        attr->sq_psn = resp->attrs.sq_psn;
1013        attr->dest_qp_num = resp->attrs.dest_qp_num;
1014        attr->qp_access_flags =
1015                pvrdma_access_flags_to_ib(resp->attrs.qp_access_flags);
1016        attr->pkey_index = resp->attrs.pkey_index;
1017        attr->alt_pkey_index = resp->attrs.alt_pkey_index;
1018        attr->en_sqd_async_notify = resp->attrs.en_sqd_async_notify;
1019        attr->sq_draining = resp->attrs.sq_draining;
1020        attr->max_rd_atomic = resp->attrs.max_rd_atomic;
1021        attr->max_dest_rd_atomic = resp->attrs.max_dest_rd_atomic;
1022        attr->min_rnr_timer = resp->attrs.min_rnr_timer;
1023        attr->port_num = resp->attrs.port_num;
1024        attr->timeout = resp->attrs.timeout;
1025        attr->retry_cnt = resp->attrs.retry_cnt;
1026        attr->rnr_retry = resp->attrs.rnr_retry;
1027        attr->alt_port_num = resp->attrs.alt_port_num;
1028        attr->alt_timeout = resp->attrs.alt_timeout;
1029        pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap);
1030        pvrdma_ah_attr_to_rdma(&attr->ah_attr, &resp->attrs.ah_attr);
1031        pvrdma_ah_attr_to_rdma(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
1032
1033        qp->state = attr->qp_state;
1034
1035        ret = 0;
1036
1037out:
1038        attr->cur_qp_state = attr->qp_state;
1039
1040        init_attr->event_handler = qp->ibqp.event_handler;
1041        init_attr->qp_context = qp->ibqp.qp_context;
1042        init_attr->send_cq = qp->ibqp.send_cq;
1043        init_attr->recv_cq = qp->ibqp.recv_cq;
1044        init_attr->srq = qp->ibqp.srq;
1045        init_attr->xrcd = NULL;
1046        init_attr->cap = attr->cap;
1047        init_attr->sq_sig_type = 0;
1048        init_attr->qp_type = qp->ibqp.qp_type;
1049        init_attr->create_flags = 0;
1050        init_attr->port_num = qp->port;
1051
1052        mutex_unlock(&qp->mutex);
1053        return ret;
1054}
1055