linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of EITHER the GNU General Public License
   6 * version 2 as published by the Free Software Foundation or the BSD
   7 * 2-Clause License. This program is distributed in the hope that it
   8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
   9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  10 * See the GNU General Public License version 2 for more details at
  11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program available in the file COPYING in the main
  15 * directory of this source tree.
  16 *
  17 * The BSD 2-Clause License
  18 *
  19 *     Redistribution and use in source and binary forms, with or
  20 *     without modification, are permitted provided that the following
  21 *     conditions are met:
  22 *
  23 *      - Redistributions of source code must retain the above
  24 *        copyright notice, this list of conditions and the following
  25 *        disclaimer.
  26 *
  27 *      - Redistributions in binary form must reproduce the above
  28 *        copyright notice, this list of conditions and the following
  29 *        disclaimer in the documentation and/or other materials
  30 *        provided with the distribution.
  31 *
  32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  43 * OF THE POSSIBILITY OF SUCH DAMAGE.
  44 */
  45
  46#include <asm/page.h>
  47#include <linux/io.h>
  48#include <linux/wait.h>
  49#include <rdma/ib_addr.h>
  50#include <rdma/ib_smi.h>
  51#include <rdma/ib_user_verbs.h>
  52
  53#include "pvrdma.h"
  54
  55static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq,
  56                           struct pvrdma_cq **recv_cq)
  57{
  58        *send_cq = to_vcq(qp->ibqp.send_cq);
  59        *recv_cq = to_vcq(qp->ibqp.recv_cq);
  60}
  61
  62static void pvrdma_lock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
  63                            unsigned long *scq_flags,
  64                            unsigned long *rcq_flags)
  65        __acquires(scq->cq_lock) __acquires(rcq->cq_lock)
  66{
  67        if (scq == rcq) {
  68                spin_lock_irqsave(&scq->cq_lock, *scq_flags);
  69                __acquire(rcq->cq_lock);
  70        } else if (scq->cq_handle < rcq->cq_handle) {
  71                spin_lock_irqsave(&scq->cq_lock, *scq_flags);
  72                spin_lock_irqsave_nested(&rcq->cq_lock, *rcq_flags,
  73                                         SINGLE_DEPTH_NESTING);
  74        } else {
  75                spin_lock_irqsave(&rcq->cq_lock, *rcq_flags);
  76                spin_lock_irqsave_nested(&scq->cq_lock, *scq_flags,
  77                                         SINGLE_DEPTH_NESTING);
  78        }
  79}
  80
  81static void pvrdma_unlock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
  82                              unsigned long *scq_flags,
  83                              unsigned long *rcq_flags)
  84        __releases(scq->cq_lock) __releases(rcq->cq_lock)
  85{
  86        if (scq == rcq) {
  87                __release(rcq->cq_lock);
  88                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  89        } else if (scq->cq_handle < rcq->cq_handle) {
  90                spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
  91                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  92        } else {
  93                spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
  94                spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
  95        }
  96}
  97
  98static void pvrdma_reset_qp(struct pvrdma_qp *qp)
  99{
 100        struct pvrdma_cq *scq, *rcq;
 101        unsigned long scq_flags, rcq_flags;
 102
 103        /* Clean up cqes */
 104        get_cqs(qp, &scq, &rcq);
 105        pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 106
 107        _pvrdma_flush_cqe(qp, scq);
 108        if (scq != rcq)
 109                _pvrdma_flush_cqe(qp, rcq);
 110
 111        pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 112
 113        /*
 114         * Reset queuepair. The checks are because usermode queuepairs won't
 115         * have kernel ringstates.
 116         */
 117        if (qp->rq.ring) {
 118                atomic_set(&qp->rq.ring->cons_head, 0);
 119                atomic_set(&qp->rq.ring->prod_tail, 0);
 120        }
 121        if (qp->sq.ring) {
 122                atomic_set(&qp->sq.ring->cons_head, 0);
 123                atomic_set(&qp->sq.ring->prod_tail, 0);
 124        }
 125}
 126
 127static int pvrdma_set_rq_size(struct pvrdma_dev *dev,
 128                              struct ib_qp_cap *req_cap,
 129                              struct pvrdma_qp *qp)
 130{
 131        if (req_cap->max_recv_wr > dev->dsr->caps.max_qp_wr ||
 132            req_cap->max_recv_sge > dev->dsr->caps.max_sge) {
 133                dev_warn(&dev->pdev->dev, "recv queue size invalid\n");
 134                return -EINVAL;
 135        }
 136
 137        qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_recv_wr));
 138        qp->rq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_recv_sge));
 139
 140        /* Write back */
 141        req_cap->max_recv_wr = qp->rq.wqe_cnt;
 142        req_cap->max_recv_sge = qp->rq.max_sg;
 143
 144        qp->rq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr) +
 145                                             sizeof(struct pvrdma_sge) *
 146                                             qp->rq.max_sg);
 147        qp->npages_recv = (qp->rq.wqe_cnt * qp->rq.wqe_size + PAGE_SIZE - 1) /
 148                          PAGE_SIZE;
 149
 150        return 0;
 151}
 152
 153static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
 154                              struct pvrdma_qp *qp)
 155{
 156        if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr ||
 157            req_cap->max_send_sge > dev->dsr->caps.max_sge) {
 158                dev_warn(&dev->pdev->dev, "send queue size invalid\n");
 159                return -EINVAL;
 160        }
 161
 162        qp->sq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_send_wr));
 163        qp->sq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_send_sge));
 164
 165        /* Write back */
 166        req_cap->max_send_wr = qp->sq.wqe_cnt;
 167        req_cap->max_send_sge = qp->sq.max_sg;
 168
 169        qp->sq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr) +
 170                                             sizeof(struct pvrdma_sge) *
 171                                             qp->sq.max_sg);
 172        /* Note: one extra page for the header. */
 173        qp->npages_send = PVRDMA_QP_NUM_HEADER_PAGES +
 174                          (qp->sq.wqe_cnt * qp->sq.wqe_size + PAGE_SIZE - 1) /
 175                                                                PAGE_SIZE;
 176
 177        return 0;
 178}
 179
 180/**
 181 * pvrdma_create_qp - create queue pair
 182 * @pd: protection domain
 183 * @init_attr: queue pair attributes
 184 * @udata: user data
 185 *
 186 * @return: the ib_qp pointer on success, otherwise returns an errno.
 187 */
 188struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 189                               struct ib_qp_init_attr *init_attr,
 190                               struct ib_udata *udata)
 191{
 192        struct pvrdma_qp *qp = NULL;
 193        struct pvrdma_dev *dev = to_vdev(pd->device);
 194        union pvrdma_cmd_req req;
 195        union pvrdma_cmd_resp rsp;
 196        struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
 197        struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp;
 198        struct pvrdma_create_qp ucmd;
 199        unsigned long flags;
 200        int ret;
 201        bool is_srq = !!init_attr->srq;
 202
 203        if (init_attr->create_flags) {
 204                dev_warn(&dev->pdev->dev,
 205                         "invalid create queuepair flags %#x\n",
 206                         init_attr->create_flags);
 207                return ERR_PTR(-EINVAL);
 208        }
 209
 210        if (init_attr->qp_type != IB_QPT_RC &&
 211            init_attr->qp_type != IB_QPT_UD &&
 212            init_attr->qp_type != IB_QPT_GSI) {
 213                dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n",
 214                         init_attr->qp_type);
 215                return ERR_PTR(-EINVAL);
 216        }
 217
 218        if (is_srq && !dev->dsr->caps.max_srq) {
 219                dev_warn(&dev->pdev->dev,
 220                         "SRQs not supported by device\n");
 221                return ERR_PTR(-EINVAL);
 222        }
 223
 224        if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp))
 225                return ERR_PTR(-ENOMEM);
 226
 227        switch (init_attr->qp_type) {
 228        case IB_QPT_GSI:
 229                if (init_attr->port_num == 0 ||
 230                    init_attr->port_num > pd->device->phys_port_cnt ||
 231                    udata) {
 232                        dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
 233                        ret = -EINVAL;
 234                        goto err_qp;
 235                }
 236                /* fall through */
 237        case IB_QPT_RC:
 238        case IB_QPT_UD:
 239                qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 240                if (!qp) {
 241                        ret = -ENOMEM;
 242                        goto err_qp;
 243                }
 244
 245                spin_lock_init(&qp->sq.lock);
 246                spin_lock_init(&qp->rq.lock);
 247                mutex_init(&qp->mutex);
 248                refcount_set(&qp->refcnt, 1);
 249                init_completion(&qp->free);
 250
 251                qp->state = IB_QPS_RESET;
 252                qp->is_kernel = !udata;
 253
 254                if (!qp->is_kernel) {
 255                        dev_dbg(&dev->pdev->dev,
 256                                "create queuepair from user space\n");
 257
 258                        if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
 259                                ret = -EFAULT;
 260                                goto err_qp;
 261                        }
 262
 263                        if (!is_srq) {
 264                                /* set qp->sq.wqe_cnt, shift, buf_size.. */
 265                                qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr,
 266                                                        ucmd.rbuf_size, 0, 0);
 267                                if (IS_ERR(qp->rumem)) {
 268                                        ret = PTR_ERR(qp->rumem);
 269                                        goto err_qp;
 270                                }
 271                                qp->srq = NULL;
 272                        } else {
 273                                qp->rumem = NULL;
 274                                qp->srq = to_vsrq(init_attr->srq);
 275                        }
 276
 277                        qp->sumem = ib_umem_get(udata, ucmd.sbuf_addr,
 278                                                ucmd.sbuf_size, 0, 0);
 279                        if (IS_ERR(qp->sumem)) {
 280                                if (!is_srq)
 281                                        ib_umem_release(qp->rumem);
 282                                ret = PTR_ERR(qp->sumem);
 283                                goto err_qp;
 284                        }
 285
 286                        qp->npages_send = ib_umem_page_count(qp->sumem);
 287                        if (!is_srq)
 288                                qp->npages_recv = ib_umem_page_count(qp->rumem);
 289                        else
 290                                qp->npages_recv = 0;
 291                        qp->npages = qp->npages_send + qp->npages_recv;
 292                } else {
 293                        ret = pvrdma_set_sq_size(to_vdev(pd->device),
 294                                                 &init_attr->cap, qp);
 295                        if (ret)
 296                                goto err_qp;
 297
 298                        ret = pvrdma_set_rq_size(to_vdev(pd->device),
 299                                                 &init_attr->cap, qp);
 300                        if (ret)
 301                                goto err_qp;
 302
 303                        qp->npages = qp->npages_send + qp->npages_recv;
 304
 305                        /* Skip header page. */
 306                        qp->sq.offset = PVRDMA_QP_NUM_HEADER_PAGES * PAGE_SIZE;
 307
 308                        /* Recv queue pages are after send pages. */
 309                        qp->rq.offset = qp->npages_send * PAGE_SIZE;
 310                }
 311
 312                if (qp->npages < 0 || qp->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
 313                        dev_warn(&dev->pdev->dev,
 314                                 "overflow pages in queuepair\n");
 315                        ret = -EINVAL;
 316                        goto err_umem;
 317                }
 318
 319                ret = pvrdma_page_dir_init(dev, &qp->pdir, qp->npages,
 320                                           qp->is_kernel);
 321                if (ret) {
 322                        dev_warn(&dev->pdev->dev,
 323                                 "could not allocate page directory\n");
 324                        goto err_umem;
 325                }
 326
 327                if (!qp->is_kernel) {
 328                        pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0);
 329                        if (!is_srq)
 330                                pvrdma_page_dir_insert_umem(&qp->pdir,
 331                                                            qp->rumem,
 332                                                            qp->npages_send);
 333                } else {
 334                        /* Ring state is always the first page. */
 335                        qp->sq.ring = qp->pdir.pages[0];
 336                        qp->rq.ring = is_srq ? NULL : &qp->sq.ring[1];
 337                }
 338                break;
 339        default:
 340                ret = -EINVAL;
 341                goto err_qp;
 342        }
 343
 344        /* Not supported */
 345        init_attr->cap.max_inline_data = 0;
 346
 347        memset(cmd, 0, sizeof(*cmd));
 348        cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP;
 349        cmd->pd_handle = to_vpd(pd)->pd_handle;
 350        cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle;
 351        cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle;
 352        if (is_srq)
 353                cmd->srq_handle = to_vsrq(init_attr->srq)->srq_handle;
 354        else
 355                cmd->srq_handle = 0;
 356        cmd->max_send_wr = init_attr->cap.max_send_wr;
 357        cmd->max_recv_wr = init_attr->cap.max_recv_wr;
 358        cmd->max_send_sge = init_attr->cap.max_send_sge;
 359        cmd->max_recv_sge = init_attr->cap.max_recv_sge;
 360        cmd->max_inline_data = init_attr->cap.max_inline_data;
 361        cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
 362        cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
 363        cmd->is_srq = is_srq;
 364        cmd->lkey = 0;
 365        cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
 366        cmd->total_chunks = qp->npages;
 367        cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES;
 368        cmd->pdir_dma = qp->pdir.dir_dma;
 369
 370        dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
 371                cmd->max_send_wr, cmd->max_recv_wr, cmd->max_send_sge,
 372                cmd->max_recv_sge);
 373
 374        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_QP_RESP);
 375        if (ret < 0) {
 376                dev_warn(&dev->pdev->dev,
 377                         "could not create queuepair, error: %d\n", ret);
 378                goto err_pdir;
 379        }
 380
 381        /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
 382        qp->qp_handle = resp->qpn;
 383        qp->port = init_attr->port_num;
 384        qp->ibqp.qp_num = resp->qpn;
 385        spin_lock_irqsave(&dev->qp_tbl_lock, flags);
 386        dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp;
 387        spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
 388
 389        return &qp->ibqp;
 390
 391err_pdir:
 392        pvrdma_page_dir_cleanup(dev, &qp->pdir);
 393err_umem:
 394        ib_umem_release(qp->rumem);
 395        ib_umem_release(qp->sumem);
 396err_qp:
 397        kfree(qp);
 398        atomic_dec(&dev->num_qps);
 399
 400        return ERR_PTR(ret);
 401}
 402
 403static void pvrdma_free_qp(struct pvrdma_qp *qp)
 404{
 405        struct pvrdma_dev *dev = to_vdev(qp->ibqp.device);
 406        struct pvrdma_cq *scq;
 407        struct pvrdma_cq *rcq;
 408        unsigned long flags, scq_flags, rcq_flags;
 409
 410        /* In case cq is polling */
 411        get_cqs(qp, &scq, &rcq);
 412        pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 413
 414        _pvrdma_flush_cqe(qp, scq);
 415        if (scq != rcq)
 416                _pvrdma_flush_cqe(qp, rcq);
 417
 418        spin_lock_irqsave(&dev->qp_tbl_lock, flags);
 419        dev->qp_tbl[qp->qp_handle] = NULL;
 420        spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
 421
 422        pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 423
 424        if (refcount_dec_and_test(&qp->refcnt))
 425                complete(&qp->free);
 426        wait_for_completion(&qp->free);
 427
 428        ib_umem_release(qp->rumem);
 429        ib_umem_release(qp->sumem);
 430
 431        pvrdma_page_dir_cleanup(dev, &qp->pdir);
 432
 433        kfree(qp);
 434
 435        atomic_dec(&dev->num_qps);
 436}
 437
 438/**
 439 * pvrdma_destroy_qp - destroy a queue pair
 440 * @qp: the queue pair to destroy
 441 * @udata: user data or null for kernel object
 442 *
 443 * @return: 0 on success.
 444 */
 445int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
 446{
 447        struct pvrdma_qp *vqp = to_vqp(qp);
 448        union pvrdma_cmd_req req;
 449        struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp;
 450        int ret;
 451
 452        memset(cmd, 0, sizeof(*cmd));
 453        cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP;
 454        cmd->qp_handle = vqp->qp_handle;
 455
 456        ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0);
 457        if (ret < 0)
 458                dev_warn(&to_vdev(qp->device)->pdev->dev,
 459                         "destroy queuepair failed, error: %d\n", ret);
 460
 461        pvrdma_free_qp(vqp);
 462
 463        return 0;
 464}
 465
 466/**
 467 * pvrdma_modify_qp - modify queue pair attributes
 468 * @ibqp: the queue pair
 469 * @attr: the new queue pair's attributes
 470 * @attr_mask: attributes mask
 471 * @udata: user data
 472 *
 473 * @returns 0 on success, otherwise returns an errno.
 474 */
 475int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 476                     int attr_mask, struct ib_udata *udata)
 477{
 478        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 479        struct pvrdma_qp *qp = to_vqp(ibqp);
 480        union pvrdma_cmd_req req;
 481        union pvrdma_cmd_resp rsp;
 482        struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp;
 483        enum ib_qp_state cur_state, next_state;
 484        int ret;
 485
 486        /* Sanity checking. Should need lock here */
 487        mutex_lock(&qp->mutex);
 488        cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state :
 489                qp->state;
 490        next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
 491
 492        if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
 493                                attr_mask)) {
 494                ret = -EINVAL;
 495                goto out;
 496        }
 497
 498        if (attr_mask & IB_QP_PORT) {
 499                if (attr->port_num == 0 ||
 500                    attr->port_num > ibqp->device->phys_port_cnt) {
 501                        ret = -EINVAL;
 502                        goto out;
 503                }
 504        }
 505
 506        if (attr_mask & IB_QP_MIN_RNR_TIMER) {
 507                if (attr->min_rnr_timer > 31) {
 508                        ret = -EINVAL;
 509                        goto out;
 510                }
 511        }
 512
 513        if (attr_mask & IB_QP_PKEY_INDEX) {
 514                if (attr->pkey_index >= dev->dsr->caps.max_pkeys) {
 515                        ret = -EINVAL;
 516                        goto out;
 517                }
 518        }
 519
 520        if (attr_mask & IB_QP_QKEY)
 521                qp->qkey = attr->qkey;
 522
 523        if (cur_state == next_state && cur_state == IB_QPS_RESET) {
 524                ret = 0;
 525                goto out;
 526        }
 527
 528        qp->state = next_state;
 529        memset(cmd, 0, sizeof(*cmd));
 530        cmd->hdr.cmd = PVRDMA_CMD_MODIFY_QP;
 531        cmd->qp_handle = qp->qp_handle;
 532        cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
 533        cmd->attrs.qp_state = ib_qp_state_to_pvrdma(attr->qp_state);
 534        cmd->attrs.cur_qp_state =
 535                ib_qp_state_to_pvrdma(attr->cur_qp_state);
 536        cmd->attrs.path_mtu = ib_mtu_to_pvrdma(attr->path_mtu);
 537        cmd->attrs.path_mig_state =
 538                ib_mig_state_to_pvrdma(attr->path_mig_state);
 539        cmd->attrs.qkey = attr->qkey;
 540        cmd->attrs.rq_psn = attr->rq_psn;
 541        cmd->attrs.sq_psn = attr->sq_psn;
 542        cmd->attrs.dest_qp_num = attr->dest_qp_num;
 543        cmd->attrs.qp_access_flags =
 544                ib_access_flags_to_pvrdma(attr->qp_access_flags);
 545        cmd->attrs.pkey_index = attr->pkey_index;
 546        cmd->attrs.alt_pkey_index = attr->alt_pkey_index;
 547        cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify;
 548        cmd->attrs.sq_draining = attr->sq_draining;
 549        cmd->attrs.max_rd_atomic = attr->max_rd_atomic;
 550        cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic;
 551        cmd->attrs.min_rnr_timer = attr->min_rnr_timer;
 552        cmd->attrs.port_num = attr->port_num;
 553        cmd->attrs.timeout = attr->timeout;
 554        cmd->attrs.retry_cnt = attr->retry_cnt;
 555        cmd->attrs.rnr_retry = attr->rnr_retry;
 556        cmd->attrs.alt_port_num = attr->alt_port_num;
 557        cmd->attrs.alt_timeout = attr->alt_timeout;
 558        ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap);
 559        rdma_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
 560        rdma_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
 561
 562        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP);
 563        if (ret < 0) {
 564                dev_warn(&dev->pdev->dev,
 565                         "could not modify queuepair, error: %d\n", ret);
 566        } else if (rsp.hdr.err > 0) {
 567                dev_warn(&dev->pdev->dev,
 568                         "cannot modify queuepair, error: %d\n", rsp.hdr.err);
 569                ret = -EINVAL;
 570        }
 571
 572        if (ret == 0 && next_state == IB_QPS_RESET)
 573                pvrdma_reset_qp(qp);
 574
 575out:
 576        mutex_unlock(&qp->mutex);
 577
 578        return ret;
 579}
 580
 581static inline void *get_sq_wqe(struct pvrdma_qp *qp, unsigned int n)
 582{
 583        return pvrdma_page_dir_get_ptr(&qp->pdir,
 584                                       qp->sq.offset + n * qp->sq.wqe_size);
 585}
 586
 587static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
 588{
 589        return pvrdma_page_dir_get_ptr(&qp->pdir,
 590                                       qp->rq.offset + n * qp->rq.wqe_size);
 591}
 592
 593static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr,
 594                       const struct ib_reg_wr *wr)
 595{
 596        struct pvrdma_user_mr *mr = to_vmr(wr->mr);
 597
 598        wqe_hdr->wr.fast_reg.iova_start = mr->ibmr.iova;
 599        wqe_hdr->wr.fast_reg.pl_pdir_dma = mr->pdir.dir_dma;
 600        wqe_hdr->wr.fast_reg.page_shift = mr->page_shift;
 601        wqe_hdr->wr.fast_reg.page_list_len = mr->npages;
 602        wqe_hdr->wr.fast_reg.length = mr->ibmr.length;
 603        wqe_hdr->wr.fast_reg.access_flags = wr->access;
 604        wqe_hdr->wr.fast_reg.rkey = wr->key;
 605
 606        return pvrdma_page_dir_insert_page_list(&mr->pdir, mr->pages,
 607                                                mr->npages);
 608}
 609
 610/**
 611 * pvrdma_post_send - post send work request entries on a QP
 612 * @ibqp: the QP
 613 * @wr: work request list to post
 614 * @bad_wr: the first bad WR returned
 615 *
 616 * @return: 0 on success, otherwise errno returned.
 617 */
 618int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 619                     const struct ib_send_wr **bad_wr)
 620{
 621        struct pvrdma_qp *qp = to_vqp(ibqp);
 622        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 623        unsigned long flags;
 624        struct pvrdma_sq_wqe_hdr *wqe_hdr;
 625        struct pvrdma_sge *sge;
 626        int i, ret;
 627
 628        /*
 629         * In states lower than RTS, we can fail immediately. In other states,
 630         * just post and let the device figure it out.
 631         */
 632        if (qp->state < IB_QPS_RTS) {
 633                *bad_wr = wr;
 634                return -EINVAL;
 635        }
 636
 637        spin_lock_irqsave(&qp->sq.lock, flags);
 638
 639        while (wr) {
 640                unsigned int tail = 0;
 641
 642                if (unlikely(!pvrdma_idx_ring_has_space(
 643                                qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
 644                        dev_warn_ratelimited(&dev->pdev->dev,
 645                                             "send queue is full\n");
 646                        *bad_wr = wr;
 647                        ret = -ENOMEM;
 648                        goto out;
 649                }
 650
 651                if (unlikely(wr->num_sge > qp->sq.max_sg || wr->num_sge < 0)) {
 652                        dev_warn_ratelimited(&dev->pdev->dev,
 653                                             "send SGE overflow\n");
 654                        *bad_wr = wr;
 655                        ret = -EINVAL;
 656                        goto out;
 657                }
 658
 659                if (unlikely(wr->opcode < 0)) {
 660                        dev_warn_ratelimited(&dev->pdev->dev,
 661                                             "invalid send opcode\n");
 662                        *bad_wr = wr;
 663                        ret = -EINVAL;
 664                        goto out;
 665                }
 666
 667                /*
 668                 * Only support UD, RC.
 669                 * Need to check opcode table for thorough checking.
 670                 * opcode               _UD     _UC     _RC
 671                 * _SEND                x       x       x
 672                 * _SEND_WITH_IMM       x       x       x
 673                 * _RDMA_WRITE                  x       x
 674                 * _RDMA_WRITE_WITH_IMM         x       x
 675                 * _LOCAL_INV                   x       x
 676                 * _SEND_WITH_INV               x       x
 677                 * _RDMA_READ                           x
 678                 * _ATOMIC_CMP_AND_SWP                  x
 679                 * _ATOMIC_FETCH_AND_ADD                x
 680                 * _MASK_ATOMIC_CMP_AND_SWP             x
 681                 * _MASK_ATOMIC_FETCH_AND_ADD           x
 682                 * _REG_MR                              x
 683                 *
 684                 */
 685                if (qp->ibqp.qp_type != IB_QPT_UD &&
 686                    qp->ibqp.qp_type != IB_QPT_RC &&
 687                        wr->opcode != IB_WR_SEND) {
 688                        dev_warn_ratelimited(&dev->pdev->dev,
 689                                             "unsupported queuepair type\n");
 690                        *bad_wr = wr;
 691                        ret = -EINVAL;
 692                        goto out;
 693                } else if (qp->ibqp.qp_type == IB_QPT_UD ||
 694                           qp->ibqp.qp_type == IB_QPT_GSI) {
 695                        if (wr->opcode != IB_WR_SEND &&
 696                            wr->opcode != IB_WR_SEND_WITH_IMM) {
 697                                dev_warn_ratelimited(&dev->pdev->dev,
 698                                                     "invalid send opcode\n");
 699                                *bad_wr = wr;
 700                                ret = -EINVAL;
 701                                goto out;
 702                        }
 703                }
 704
 705                wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, tail);
 706                memset(wqe_hdr, 0, sizeof(*wqe_hdr));
 707                wqe_hdr->wr_id = wr->wr_id;
 708                wqe_hdr->num_sge = wr->num_sge;
 709                wqe_hdr->opcode = ib_wr_opcode_to_pvrdma(wr->opcode);
 710                wqe_hdr->send_flags = ib_send_flags_to_pvrdma(wr->send_flags);
 711                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
 712                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
 713                        wqe_hdr->ex.imm_data = wr->ex.imm_data;
 714
 715                if (unlikely(wqe_hdr->opcode == PVRDMA_WR_ERROR)) {
 716                        *bad_wr = wr;
 717                        ret = -EINVAL;
 718                        goto out;
 719                }
 720
 721                switch (qp->ibqp.qp_type) {
 722                case IB_QPT_GSI:
 723                case IB_QPT_UD:
 724                        if (unlikely(!ud_wr(wr)->ah)) {
 725                                dev_warn_ratelimited(&dev->pdev->dev,
 726                                                     "invalid address handle\n");
 727                                *bad_wr = wr;
 728                                ret = -EINVAL;
 729                                goto out;
 730                        }
 731
 732                        /*
 733                         * Use qkey from qp context if high order bit set,
 734                         * otherwise from work request.
 735                         */
 736                        wqe_hdr->wr.ud.remote_qpn = ud_wr(wr)->remote_qpn;
 737                        wqe_hdr->wr.ud.remote_qkey =
 738                                ud_wr(wr)->remote_qkey & 0x80000000 ?
 739                                qp->qkey : ud_wr(wr)->remote_qkey;
 740                        wqe_hdr->wr.ud.av = to_vah(ud_wr(wr)->ah)->av;
 741
 742                        break;
 743                case IB_QPT_RC:
 744                        switch (wr->opcode) {
 745                        case IB_WR_RDMA_READ:
 746                        case IB_WR_RDMA_WRITE:
 747                        case IB_WR_RDMA_WRITE_WITH_IMM:
 748                                wqe_hdr->wr.rdma.remote_addr =
 749                                        rdma_wr(wr)->remote_addr;
 750                                wqe_hdr->wr.rdma.rkey = rdma_wr(wr)->rkey;
 751                                break;
 752                        case IB_WR_LOCAL_INV:
 753                        case IB_WR_SEND_WITH_INV:
 754                                wqe_hdr->ex.invalidate_rkey =
 755                                        wr->ex.invalidate_rkey;
 756                                break;
 757                        case IB_WR_ATOMIC_CMP_AND_SWP:
 758                        case IB_WR_ATOMIC_FETCH_AND_ADD:
 759                                wqe_hdr->wr.atomic.remote_addr =
 760                                        atomic_wr(wr)->remote_addr;
 761                                wqe_hdr->wr.atomic.rkey = atomic_wr(wr)->rkey;
 762                                wqe_hdr->wr.atomic.compare_add =
 763                                        atomic_wr(wr)->compare_add;
 764                                if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP)
 765                                        wqe_hdr->wr.atomic.swap =
 766                                                atomic_wr(wr)->swap;
 767                                break;
 768                        case IB_WR_REG_MR:
 769                                ret = set_reg_seg(wqe_hdr, reg_wr(wr));
 770                                if (ret < 0) {
 771                                        dev_warn_ratelimited(&dev->pdev->dev,
 772                                                             "Failed to set fast register work request\n");
 773                                        *bad_wr = wr;
 774                                        goto out;
 775                                }
 776                                break;
 777                        default:
 778                                break;
 779                        }
 780
 781                        break;
 782                default:
 783                        dev_warn_ratelimited(&dev->pdev->dev,
 784                                             "invalid queuepair type\n");
 785                        ret = -EINVAL;
 786                        *bad_wr = wr;
 787                        goto out;
 788                }
 789
 790                sge = (struct pvrdma_sge *)(wqe_hdr + 1);
 791                for (i = 0; i < wr->num_sge; i++) {
 792                        /* Need to check wqe_size 0 or max size */
 793                        sge->addr = wr->sg_list[i].addr;
 794                        sge->length = wr->sg_list[i].length;
 795                        sge->lkey = wr->sg_list[i].lkey;
 796                        sge++;
 797                }
 798
 799                /* Make sure wqe is written before index update */
 800                smp_wmb();
 801
 802                /* Update shared sq ring */
 803                pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
 804                                    qp->sq.wqe_cnt);
 805
 806                wr = wr->next;
 807        }
 808
 809        ret = 0;
 810
 811out:
 812        spin_unlock_irqrestore(&qp->sq.lock, flags);
 813
 814        if (!ret)
 815                pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_SEND | qp->qp_handle);
 816
 817        return ret;
 818}
 819
 820/**
 821 * pvrdma_post_receive - post receive work request entries on a QP
 822 * @ibqp: the QP
 823 * @wr: the work request list to post
 824 * @bad_wr: the first bad WR returned
 825 *
 826 * @return: 0 on success, otherwise errno returned.
 827 */
 828int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 829                     const struct ib_recv_wr **bad_wr)
 830{
 831        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 832        unsigned long flags;
 833        struct pvrdma_qp *qp = to_vqp(ibqp);
 834        struct pvrdma_rq_wqe_hdr *wqe_hdr;
 835        struct pvrdma_sge *sge;
 836        int ret = 0;
 837        int i;
 838
 839        /*
 840         * In the RESET state, we can fail immediately. For other states,
 841         * just post and let the device figure it out.
 842         */
 843        if (qp->state == IB_QPS_RESET) {
 844                *bad_wr = wr;
 845                return -EINVAL;
 846        }
 847
 848        if (qp->srq) {
 849                dev_warn(&dev->pdev->dev, "QP associated with SRQ\n");
 850                *bad_wr = wr;
 851                return -EINVAL;
 852        }
 853
 854        spin_lock_irqsave(&qp->rq.lock, flags);
 855
 856        while (wr) {
 857                unsigned int tail = 0;
 858
 859                if (unlikely(wr->num_sge > qp->rq.max_sg ||
 860                             wr->num_sge < 0)) {
 861                        ret = -EINVAL;
 862                        *bad_wr = wr;
 863                        dev_warn_ratelimited(&dev->pdev->dev,
 864                                             "recv SGE overflow\n");
 865                        goto out;
 866                }
 867
 868                if (unlikely(!pvrdma_idx_ring_has_space(
 869                                qp->rq.ring, qp->rq.wqe_cnt, &tail))) {
 870                        ret = -ENOMEM;
 871                        *bad_wr = wr;
 872                        dev_warn_ratelimited(&dev->pdev->dev,
 873                                             "recv queue full\n");
 874                        goto out;
 875                }
 876
 877                wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, tail);
 878                wqe_hdr->wr_id = wr->wr_id;
 879                wqe_hdr->num_sge = wr->num_sge;
 880                wqe_hdr->total_len = 0;
 881
 882                sge = (struct pvrdma_sge *)(wqe_hdr + 1);
 883                for (i = 0; i < wr->num_sge; i++) {
 884                        sge->addr = wr->sg_list[i].addr;
 885                        sge->length = wr->sg_list[i].length;
 886                        sge->lkey = wr->sg_list[i].lkey;
 887                        sge++;
 888                }
 889
 890                /* Make sure wqe is written before index update */
 891                smp_wmb();
 892
 893                /* Update shared rq ring */
 894                pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
 895                                    qp->rq.wqe_cnt);
 896
 897                wr = wr->next;
 898        }
 899
 900        spin_unlock_irqrestore(&qp->rq.lock, flags);
 901
 902        pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_RECV | qp->qp_handle);
 903
 904        return ret;
 905
 906out:
 907        spin_unlock_irqrestore(&qp->rq.lock, flags);
 908
 909        return ret;
 910}
 911
 912/**
 913 * pvrdma_query_qp - query a queue pair's attributes
 914 * @ibqp: the queue pair to query
 915 * @attr: the queue pair's attributes
 916 * @attr_mask: attributes mask
 917 * @init_attr: initial queue pair attributes
 918 *
 919 * @returns 0 on success, otherwise returns an errno.
 920 */
 921int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 922                    int attr_mask, struct ib_qp_init_attr *init_attr)
 923{
 924        struct pvrdma_dev *dev = to_vdev(ibqp->device);
 925        struct pvrdma_qp *qp = to_vqp(ibqp);
 926        union pvrdma_cmd_req req;
 927        union pvrdma_cmd_resp rsp;
 928        struct pvrdma_cmd_query_qp *cmd = &req.query_qp;
 929        struct pvrdma_cmd_query_qp_resp *resp = &rsp.query_qp_resp;
 930        int ret = 0;
 931
 932        mutex_lock(&qp->mutex);
 933
 934        if (qp->state == IB_QPS_RESET) {
 935                attr->qp_state = IB_QPS_RESET;
 936                goto out;
 937        }
 938
 939        memset(cmd, 0, sizeof(*cmd));
 940        cmd->hdr.cmd = PVRDMA_CMD_QUERY_QP;
 941        cmd->qp_handle = qp->qp_handle;
 942        cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
 943
 944        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_QP_RESP);
 945        if (ret < 0) {
 946                dev_warn(&dev->pdev->dev,
 947                         "could not query queuepair, error: %d\n", ret);
 948                goto out;
 949        }
 950
 951        attr->qp_state = pvrdma_qp_state_to_ib(resp->attrs.qp_state);
 952        attr->cur_qp_state =
 953                pvrdma_qp_state_to_ib(resp->attrs.cur_qp_state);
 954        attr->path_mtu = pvrdma_mtu_to_ib(resp->attrs.path_mtu);
 955        attr->path_mig_state =
 956                pvrdma_mig_state_to_ib(resp->attrs.path_mig_state);
 957        attr->qkey = resp->attrs.qkey;
 958        attr->rq_psn = resp->attrs.rq_psn;
 959        attr->sq_psn = resp->attrs.sq_psn;
 960        attr->dest_qp_num = resp->attrs.dest_qp_num;
 961        attr->qp_access_flags =
 962                pvrdma_access_flags_to_ib(resp->attrs.qp_access_flags);
 963        attr->pkey_index = resp->attrs.pkey_index;
 964        attr->alt_pkey_index = resp->attrs.alt_pkey_index;
 965        attr->en_sqd_async_notify = resp->attrs.en_sqd_async_notify;
 966        attr->sq_draining = resp->attrs.sq_draining;
 967        attr->max_rd_atomic = resp->attrs.max_rd_atomic;
 968        attr->max_dest_rd_atomic = resp->attrs.max_dest_rd_atomic;
 969        attr->min_rnr_timer = resp->attrs.min_rnr_timer;
 970        attr->port_num = resp->attrs.port_num;
 971        attr->timeout = resp->attrs.timeout;
 972        attr->retry_cnt = resp->attrs.retry_cnt;
 973        attr->rnr_retry = resp->attrs.rnr_retry;
 974        attr->alt_port_num = resp->attrs.alt_port_num;
 975        attr->alt_timeout = resp->attrs.alt_timeout;
 976        pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap);
 977        pvrdma_ah_attr_to_rdma(&attr->ah_attr, &resp->attrs.ah_attr);
 978        pvrdma_ah_attr_to_rdma(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
 979
 980        qp->state = attr->qp_state;
 981
 982        ret = 0;
 983
 984out:
 985        attr->cur_qp_state = attr->qp_state;
 986
 987        init_attr->event_handler = qp->ibqp.event_handler;
 988        init_attr->qp_context = qp->ibqp.qp_context;
 989        init_attr->send_cq = qp->ibqp.send_cq;
 990        init_attr->recv_cq = qp->ibqp.recv_cq;
 991        init_attr->srq = qp->ibqp.srq;
 992        init_attr->xrcd = NULL;
 993        init_attr->cap = attr->cap;
 994        init_attr->sq_sig_type = 0;
 995        init_attr->qp_type = qp->ibqp.qp_type;
 996        init_attr->create_flags = 0;
 997        init_attr->port_num = qp->port;
 998
 999        mutex_unlock(&qp->mutex);
1000        return ret;
1001}
1002