linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of EITHER the GNU General Public License
   6 * version 2 as published by the Free Software Foundation or the BSD
   7 * 2-Clause License. This program is distributed in the hope that it
   8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
   9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  10 * See the GNU General Public License version 2 for more details at
  11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program available in the file COPYING in the main
  15 * directory of this source tree.
  16 *
  17 * The BSD 2-Clause License
  18 *
  19 *     Redistribution and use in source and binary forms, with or
  20 *     without modification, are permitted provided that the following
  21 *     conditions are met:
  22 *
  23 *      - Redistributions of source code must retain the above
  24 *        copyright notice, this list of conditions and the following
  25 *        disclaimer.
  26 *
  27 *      - Redistributions in binary form must reproduce the above
  28 *        copyright notice, this list of conditions and the following
  29 *        disclaimer in the documentation and/or other materials
  30 *        provided with the distribution.
  31 *
  32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  43 * OF THE POSSIBILITY OF SUCH DAMAGE.
  44 */
  45
  46#include <asm/page.h>
  47#include <linux/io.h>
  48#include <linux/wait.h>
  49#include <rdma/ib_addr.h>
  50#include <rdma/ib_smi.h>
  51#include <rdma/ib_user_verbs.h>
  52#include <rdma/uverbs_ioctl.h>
  53
  54#include "pvrdma.h"
  55
  56/**
  57 * pvrdma_req_notify_cq - request notification for a completion queue
  58 * @ibcq: the completion queue
  59 * @notify_flags: notification flags
  60 *
  61 * @return: 0 for success.
  62 */
  63int pvrdma_req_notify_cq(struct ib_cq *ibcq,
  64                         enum ib_cq_notify_flags notify_flags)
  65{
  66        struct pvrdma_dev *dev = to_vdev(ibcq->device);
  67        struct pvrdma_cq *cq = to_vcq(ibcq);
  68        u32 val = cq->cq_handle;
  69        unsigned long flags;
  70        int has_data = 0;
  71
  72        val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
  73                PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM;
  74
  75        spin_lock_irqsave(&cq->cq_lock, flags);
  76
  77        pvrdma_write_uar_cq(dev, val);
  78
  79        if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
  80                unsigned int head;
  81
  82                has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
  83                                                    cq->ibcq.cqe, &head);
  84                if (unlikely(has_data == PVRDMA_INVALID_IDX))
  85                        dev_err(&dev->pdev->dev, "CQ ring state invalid\n");
  86        }
  87
  88        spin_unlock_irqrestore(&cq->cq_lock, flags);
  89
  90        return has_data;
  91}
  92
  93/**
  94 * pvrdma_create_cq - create completion queue
  95 * @ibcq: Allocated CQ
  96 * @attr: completion queue attributes
  97 * @udata: user data
  98 *
  99 * @return: 0 on success
 100 */
 101int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 102                     struct ib_udata *udata)
 103{
 104        struct ib_device *ibdev = ibcq->device;
 105        int entries = attr->cqe;
 106        struct pvrdma_dev *dev = to_vdev(ibdev);
 107        struct pvrdma_cq *cq = to_vcq(ibcq);
 108        int ret;
 109        int npages;
 110        unsigned long flags;
 111        union pvrdma_cmd_req req;
 112        union pvrdma_cmd_resp rsp;
 113        struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
 114        struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
 115        struct pvrdma_create_cq_resp cq_resp = {};
 116        struct pvrdma_create_cq ucmd;
 117        struct pvrdma_ucontext *context = rdma_udata_to_drv_context(
 118                udata, struct pvrdma_ucontext, ibucontext);
 119
 120        BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
 121
 122        if (attr->flags)
 123                return -EOPNOTSUPP;
 124
 125        entries = roundup_pow_of_two(entries);
 126        if (entries < 1 || entries > dev->dsr->caps.max_cqe)
 127                return -EINVAL;
 128
 129        if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq))
 130                return -ENOMEM;
 131
 132        cq->ibcq.cqe = entries;
 133        cq->is_kernel = !udata;
 134
 135        if (!cq->is_kernel) {
 136                if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
 137                        ret = -EFAULT;
 138                        goto err_cq;
 139                }
 140
 141                cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, ucmd.buf_size,
 142                                       IB_ACCESS_LOCAL_WRITE);
 143                if (IS_ERR(cq->umem)) {
 144                        ret = PTR_ERR(cq->umem);
 145                        goto err_cq;
 146                }
 147
 148                npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE);
 149        } else {
 150                /* One extra page for shared ring state */
 151                npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
 152                              PAGE_SIZE - 1) / PAGE_SIZE;
 153
 154                /* Skip header page. */
 155                cq->offset = PAGE_SIZE;
 156        }
 157
 158        if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
 159                dev_warn(&dev->pdev->dev,
 160                         "overflow pages in completion queue\n");
 161                ret = -EINVAL;
 162                goto err_umem;
 163        }
 164
 165        ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel);
 166        if (ret) {
 167                dev_warn(&dev->pdev->dev,
 168                         "could not allocate page directory\n");
 169                goto err_umem;
 170        }
 171
 172        /* Ring state is always the first page. Set in library for user cq. */
 173        if (cq->is_kernel)
 174                cq->ring_state = cq->pdir.pages[0];
 175        else
 176                pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
 177
 178        refcount_set(&cq->refcnt, 1);
 179        init_completion(&cq->free);
 180        spin_lock_init(&cq->cq_lock);
 181
 182        memset(cmd, 0, sizeof(*cmd));
 183        cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ;
 184        cmd->nchunks = npages;
 185        cmd->ctx_handle = context ? context->ctx_handle : 0;
 186        cmd->cqe = entries;
 187        cmd->pdir_dma = cq->pdir.dir_dma;
 188        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP);
 189        if (ret < 0) {
 190                dev_warn(&dev->pdev->dev,
 191                         "could not create completion queue, error: %d\n", ret);
 192                goto err_page_dir;
 193        }
 194
 195        cq->ibcq.cqe = resp->cqe;
 196        cq->cq_handle = resp->cq_handle;
 197        cq_resp.cqn = resp->cq_handle;
 198        spin_lock_irqsave(&dev->cq_tbl_lock, flags);
 199        dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
 200        spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
 201
 202        if (!cq->is_kernel) {
 203                cq->uar = &context->uar;
 204
 205                /* Copy udata back. */
 206                if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) {
 207                        dev_warn(&dev->pdev->dev,
 208                                 "failed to copy back udata\n");
 209                        pvrdma_destroy_cq(&cq->ibcq, udata);
 210                        return -EINVAL;
 211                }
 212        }
 213
 214        return 0;
 215
 216err_page_dir:
 217        pvrdma_page_dir_cleanup(dev, &cq->pdir);
 218err_umem:
 219        ib_umem_release(cq->umem);
 220err_cq:
 221        atomic_dec(&dev->num_cqs);
 222        return ret;
 223}
 224
 225static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
 226{
 227        if (refcount_dec_and_test(&cq->refcnt))
 228                complete(&cq->free);
 229        wait_for_completion(&cq->free);
 230
 231        ib_umem_release(cq->umem);
 232
 233        pvrdma_page_dir_cleanup(dev, &cq->pdir);
 234}
 235
 236/**
 237 * pvrdma_destroy_cq - destroy completion queue
 238 * @cq: the completion queue to destroy.
 239 * @udata: user data or null for kernel object
 240 */
 241int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
 242{
 243        struct pvrdma_cq *vcq = to_vcq(cq);
 244        union pvrdma_cmd_req req;
 245        struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq;
 246        struct pvrdma_dev *dev = to_vdev(cq->device);
 247        unsigned long flags;
 248        int ret;
 249
 250        memset(cmd, 0, sizeof(*cmd));
 251        cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ;
 252        cmd->cq_handle = vcq->cq_handle;
 253
 254        ret = pvrdma_cmd_post(dev, &req, NULL, 0);
 255        if (ret < 0)
 256                dev_warn(&dev->pdev->dev,
 257                         "could not destroy completion queue, error: %d\n",
 258                         ret);
 259
 260        /* free cq's resources */
 261        spin_lock_irqsave(&dev->cq_tbl_lock, flags);
 262        dev->cq_tbl[vcq->cq_handle] = NULL;
 263        spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
 264
 265        pvrdma_free_cq(dev, vcq);
 266        atomic_dec(&dev->num_cqs);
 267        return 0;
 268}
 269
 270static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
 271{
 272        return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr(
 273                                        &cq->pdir,
 274                                        cq->offset +
 275                                        sizeof(struct pvrdma_cqe) * i);
 276}
 277
 278void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq)
 279{
 280        unsigned int head;
 281        int has_data;
 282
 283        if (!cq->is_kernel)
 284                return;
 285
 286        /* Lock held */
 287        has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
 288                                            cq->ibcq.cqe, &head);
 289        if (unlikely(has_data > 0)) {
 290                int items;
 291                int curr;
 292                int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail,
 293                                      cq->ibcq.cqe);
 294                struct pvrdma_cqe *cqe;
 295                struct pvrdma_cqe *curr_cqe;
 296
 297                items = (tail > head) ? (tail - head) :
 298                        (cq->ibcq.cqe - head + tail);
 299                curr = --tail;
 300                while (items-- > 0) {
 301                        if (curr < 0)
 302                                curr = cq->ibcq.cqe - 1;
 303                        if (tail < 0)
 304                                tail = cq->ibcq.cqe - 1;
 305                        curr_cqe = get_cqe(cq, curr);
 306                        if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) {
 307                                if (curr != tail) {
 308                                        cqe = get_cqe(cq, tail);
 309                                        *cqe = *curr_cqe;
 310                                }
 311                                tail--;
 312                        } else {
 313                                pvrdma_idx_ring_inc(
 314                                        &cq->ring_state->rx.cons_head,
 315                                        cq->ibcq.cqe);
 316                        }
 317                        curr--;
 318                }
 319        }
 320}
 321
 322static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp,
 323                           struct ib_wc *wc)
 324{
 325        struct pvrdma_dev *dev = to_vdev(cq->ibcq.device);
 326        int has_data;
 327        unsigned int head;
 328        bool tried = false;
 329        struct pvrdma_cqe *cqe;
 330
 331retry:
 332        has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
 333                                            cq->ibcq.cqe, &head);
 334        if (has_data == 0) {
 335                if (tried)
 336                        return -EAGAIN;
 337
 338                pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL);
 339
 340                tried = true;
 341                goto retry;
 342        } else if (has_data == PVRDMA_INVALID_IDX) {
 343                dev_err(&dev->pdev->dev, "CQ ring state invalid\n");
 344                return -EAGAIN;
 345        }
 346
 347        cqe = get_cqe(cq, head);
 348
 349        /* Ensure cqe is valid. */
 350        rmb();
 351        if (dev->qp_tbl[cqe->qp & 0xffff])
 352                *cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff];
 353        else
 354                return -EAGAIN;
 355
 356        wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode);
 357        wc->status = pvrdma_wc_status_to_ib(cqe->status);
 358        wc->wr_id = cqe->wr_id;
 359        wc->qp = &(*cur_qp)->ibqp;
 360        wc->byte_len = cqe->byte_len;
 361        wc->ex.imm_data = cqe->imm_data;
 362        wc->src_qp = cqe->src_qp;
 363        wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags);
 364        wc->pkey_index = cqe->pkey_index;
 365        wc->slid = cqe->slid;
 366        wc->sl = cqe->sl;
 367        wc->dlid_path_bits = cqe->dlid_path_bits;
 368        wc->port_num = cqe->port_num;
 369        wc->vendor_err = cqe->vendor_err;
 370        wc->network_hdr_type = pvrdma_network_type_to_ib(cqe->network_hdr_type);
 371
 372        /* Update shared ring state */
 373        pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe);
 374
 375        return 0;
 376}
 377
 378/**
 379 * pvrdma_poll_cq - poll for work completion queue entries
 380 * @ibcq: completion queue
 381 * @num_entries: the maximum number of entries
 382 * @wc: pointer to work completion array
 383 *
 384 * @return: number of polled completion entries
 385 */
 386int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 387{
 388        struct pvrdma_cq *cq = to_vcq(ibcq);
 389        struct pvrdma_qp *cur_qp = NULL;
 390        unsigned long flags;
 391        int npolled;
 392
 393        if (num_entries < 1 || wc == NULL)
 394                return 0;
 395
 396        spin_lock_irqsave(&cq->cq_lock, flags);
 397        for (npolled = 0; npolled < num_entries; ++npolled) {
 398                if (pvrdma_poll_one(cq, &cur_qp, wc + npolled))
 399                        break;
 400        }
 401
 402        spin_unlock_irqrestore(&cq->cq_lock, flags);
 403
 404        /* Ensure we do not return errors from poll_cq */
 405        return npolled;
 406}
 407