linux/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of EITHER the GNU General Public License
   6 * version 2 as published by the Free Software Foundation or the BSD
   7 * 2-Clause License. This program is distributed in the hope that it
   8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
   9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  10 * See the GNU General Public License version 2 for more details at
  11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program available in the file COPYING in the main
  15 * directory of this source tree.
  16 *
  17 * The BSD 2-Clause License
  18 *
  19 *     Redistribution and use in source and binary forms, with or
  20 *     without modification, are permitted provided that the following
  21 *     conditions are met:
  22 *
  23 *      - Redistributions of source code must retain the above
  24 *        copyright notice, this list of conditions and the following
  25 *        disclaimer.
  26 *
  27 *      - Redistributions in binary form must reproduce the above
  28 *        copyright notice, this list of conditions and the following
  29 *        disclaimer in the documentation and/or other materials
  30 *        provided with the distribution.
  31 *
  32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  43 * OF THE POSSIBILITY OF SUCH DAMAGE.
  44 */
  45
  46#include <asm/page.h>
  47#include <linux/io.h>
  48#include <linux/wait.h>
  49#include <rdma/ib_addr.h>
  50#include <rdma/ib_smi.h>
  51#include <rdma/ib_user_verbs.h>
  52#include <rdma/uverbs_ioctl.h>
  53
  54#include "pvrdma.h"
  55
  56/**
  57 * pvrdma_req_notify_cq - request notification for a completion queue
  58 * @ibcq: the completion queue
  59 * @notify_flags: notification flags
  60 *
  61 * @return: 0 for success.
  62 */
  63int pvrdma_req_notify_cq(struct ib_cq *ibcq,
  64                         enum ib_cq_notify_flags notify_flags)
  65{
  66        struct pvrdma_dev *dev = to_vdev(ibcq->device);
  67        struct pvrdma_cq *cq = to_vcq(ibcq);
  68        u32 val = cq->cq_handle;
  69        unsigned long flags;
  70        int has_data = 0;
  71
  72        val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
  73                PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM;
  74
  75        spin_lock_irqsave(&cq->cq_lock, flags);
  76
  77        pvrdma_write_uar_cq(dev, val);
  78
  79        if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
  80                unsigned int head;
  81
  82                has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
  83                                                    cq->ibcq.cqe, &head);
  84                if (unlikely(has_data == PVRDMA_INVALID_IDX))
  85                        dev_err(&dev->pdev->dev, "CQ ring state invalid\n");
  86        }
  87
  88        spin_unlock_irqrestore(&cq->cq_lock, flags);
  89
  90        return has_data;
  91}
  92
  93/**
  94 * pvrdma_create_cq - create completion queue
  95 * @ibcq: Allocated CQ
  96 * @attr: completion queue attributes
  97 * @udata: user data
  98 *
  99 * @return: 0 on success
 100 */
 101int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 102                     struct ib_udata *udata)
 103{
 104        struct ib_device *ibdev = ibcq->device;
 105        int entries = attr->cqe;
 106        struct pvrdma_dev *dev = to_vdev(ibdev);
 107        struct pvrdma_cq *cq = to_vcq(ibcq);
 108        int ret;
 109        int npages;
 110        unsigned long flags;
 111        union pvrdma_cmd_req req;
 112        union pvrdma_cmd_resp rsp;
 113        struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
 114        struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
 115        struct pvrdma_create_cq_resp cq_resp = {};
 116        struct pvrdma_create_cq ucmd;
 117        struct pvrdma_ucontext *context = rdma_udata_to_drv_context(
 118                udata, struct pvrdma_ucontext, ibucontext);
 119
 120        BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
 121
 122        entries = roundup_pow_of_two(entries);
 123        if (entries < 1 || entries > dev->dsr->caps.max_cqe)
 124                return -EINVAL;
 125
 126        if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq))
 127                return -ENOMEM;
 128
 129        cq->ibcq.cqe = entries;
 130        cq->is_kernel = !udata;
 131
 132        if (!cq->is_kernel) {
 133                if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
 134                        ret = -EFAULT;
 135                        goto err_cq;
 136                }
 137
 138                cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, ucmd.buf_size,
 139                                       IB_ACCESS_LOCAL_WRITE);
 140                if (IS_ERR(cq->umem)) {
 141                        ret = PTR_ERR(cq->umem);
 142                        goto err_cq;
 143                }
 144
 145                npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE);
 146        } else {
 147                /* One extra page for shared ring state */
 148                npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
 149                              PAGE_SIZE - 1) / PAGE_SIZE;
 150
 151                /* Skip header page. */
 152                cq->offset = PAGE_SIZE;
 153        }
 154
 155        if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
 156                dev_warn(&dev->pdev->dev,
 157                         "overflow pages in completion queue\n");
 158                ret = -EINVAL;
 159                goto err_umem;
 160        }
 161
 162        ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel);
 163        if (ret) {
 164                dev_warn(&dev->pdev->dev,
 165                         "could not allocate page directory\n");
 166                goto err_umem;
 167        }
 168
 169        /* Ring state is always the first page. Set in library for user cq. */
 170        if (cq->is_kernel)
 171                cq->ring_state = cq->pdir.pages[0];
 172        else
 173                pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
 174
 175        refcount_set(&cq->refcnt, 1);
 176        init_completion(&cq->free);
 177        spin_lock_init(&cq->cq_lock);
 178
 179        memset(cmd, 0, sizeof(*cmd));
 180        cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ;
 181        cmd->nchunks = npages;
 182        cmd->ctx_handle = context ? context->ctx_handle : 0;
 183        cmd->cqe = entries;
 184        cmd->pdir_dma = cq->pdir.dir_dma;
 185        ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP);
 186        if (ret < 0) {
 187                dev_warn(&dev->pdev->dev,
 188                         "could not create completion queue, error: %d\n", ret);
 189                goto err_page_dir;
 190        }
 191
 192        cq->ibcq.cqe = resp->cqe;
 193        cq->cq_handle = resp->cq_handle;
 194        cq_resp.cqn = resp->cq_handle;
 195        spin_lock_irqsave(&dev->cq_tbl_lock, flags);
 196        dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
 197        spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
 198
 199        if (!cq->is_kernel) {
 200                cq->uar = &context->uar;
 201
 202                /* Copy udata back. */
 203                if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) {
 204                        dev_warn(&dev->pdev->dev,
 205                                 "failed to copy back udata\n");
 206                        pvrdma_destroy_cq(&cq->ibcq, udata);
 207                        return -EINVAL;
 208                }
 209        }
 210
 211        return 0;
 212
 213err_page_dir:
 214        pvrdma_page_dir_cleanup(dev, &cq->pdir);
 215err_umem:
 216        ib_umem_release(cq->umem);
 217err_cq:
 218        atomic_dec(&dev->num_cqs);
 219        return ret;
 220}
 221
 222static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
 223{
 224        if (refcount_dec_and_test(&cq->refcnt))
 225                complete(&cq->free);
 226        wait_for_completion(&cq->free);
 227
 228        ib_umem_release(cq->umem);
 229
 230        pvrdma_page_dir_cleanup(dev, &cq->pdir);
 231}
 232
 233/**
 234 * pvrdma_destroy_cq - destroy completion queue
 235 * @cq: the completion queue to destroy.
 236 * @udata: user data or null for kernel object
 237 */
 238int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
 239{
 240        struct pvrdma_cq *vcq = to_vcq(cq);
 241        union pvrdma_cmd_req req;
 242        struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq;
 243        struct pvrdma_dev *dev = to_vdev(cq->device);
 244        unsigned long flags;
 245        int ret;
 246
 247        memset(cmd, 0, sizeof(*cmd));
 248        cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ;
 249        cmd->cq_handle = vcq->cq_handle;
 250
 251        ret = pvrdma_cmd_post(dev, &req, NULL, 0);
 252        if (ret < 0)
 253                dev_warn(&dev->pdev->dev,
 254                         "could not destroy completion queue, error: %d\n",
 255                         ret);
 256
 257        /* free cq's resources */
 258        spin_lock_irqsave(&dev->cq_tbl_lock, flags);
 259        dev->cq_tbl[vcq->cq_handle] = NULL;
 260        spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
 261
 262        pvrdma_free_cq(dev, vcq);
 263        atomic_dec(&dev->num_cqs);
 264        return 0;
 265}
 266
 267static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
 268{
 269        return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr(
 270                                        &cq->pdir,
 271                                        cq->offset +
 272                                        sizeof(struct pvrdma_cqe) * i);
 273}
 274
 275void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq)
 276{
 277        unsigned int head;
 278        int has_data;
 279
 280        if (!cq->is_kernel)
 281                return;
 282
 283        /* Lock held */
 284        has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
 285                                            cq->ibcq.cqe, &head);
 286        if (unlikely(has_data > 0)) {
 287                int items;
 288                int curr;
 289                int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail,
 290                                      cq->ibcq.cqe);
 291                struct pvrdma_cqe *cqe;
 292                struct pvrdma_cqe *curr_cqe;
 293
 294                items = (tail > head) ? (tail - head) :
 295                        (cq->ibcq.cqe - head + tail);
 296                curr = --tail;
 297                while (items-- > 0) {
 298                        if (curr < 0)
 299                                curr = cq->ibcq.cqe - 1;
 300                        if (tail < 0)
 301                                tail = cq->ibcq.cqe - 1;
 302                        curr_cqe = get_cqe(cq, curr);
 303                        if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) {
 304                                if (curr != tail) {
 305                                        cqe = get_cqe(cq, tail);
 306                                        *cqe = *curr_cqe;
 307                                }
 308                                tail--;
 309                        } else {
 310                                pvrdma_idx_ring_inc(
 311                                        &cq->ring_state->rx.cons_head,
 312                                        cq->ibcq.cqe);
 313                        }
 314                        curr--;
 315                }
 316        }
 317}
 318
 319static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp,
 320                           struct ib_wc *wc)
 321{
 322        struct pvrdma_dev *dev = to_vdev(cq->ibcq.device);
 323        int has_data;
 324        unsigned int head;
 325        bool tried = false;
 326        struct pvrdma_cqe *cqe;
 327
 328retry:
 329        has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
 330                                            cq->ibcq.cqe, &head);
 331        if (has_data == 0) {
 332                if (tried)
 333                        return -EAGAIN;
 334
 335                pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL);
 336
 337                tried = true;
 338                goto retry;
 339        } else if (has_data == PVRDMA_INVALID_IDX) {
 340                dev_err(&dev->pdev->dev, "CQ ring state invalid\n");
 341                return -EAGAIN;
 342        }
 343
 344        cqe = get_cqe(cq, head);
 345
 346        /* Ensure cqe is valid. */
 347        rmb();
 348        if (dev->qp_tbl[cqe->qp & 0xffff])
 349                *cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff];
 350        else
 351                return -EAGAIN;
 352
 353        wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode);
 354        wc->status = pvrdma_wc_status_to_ib(cqe->status);
 355        wc->wr_id = cqe->wr_id;
 356        wc->qp = &(*cur_qp)->ibqp;
 357        wc->byte_len = cqe->byte_len;
 358        wc->ex.imm_data = cqe->imm_data;
 359        wc->src_qp = cqe->src_qp;
 360        wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags);
 361        wc->pkey_index = cqe->pkey_index;
 362        wc->slid = cqe->slid;
 363        wc->sl = cqe->sl;
 364        wc->dlid_path_bits = cqe->dlid_path_bits;
 365        wc->port_num = cqe->port_num;
 366        wc->vendor_err = cqe->vendor_err;
 367        wc->network_hdr_type = cqe->network_hdr_type;
 368
 369        /* Update shared ring state */
 370        pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe);
 371
 372        return 0;
 373}
 374
 375/**
 376 * pvrdma_poll_cq - poll for work completion queue entries
 377 * @ibcq: completion queue
 378 * @num_entries: the maximum number of entries
 379 * @wc: pointer to work completion array
 380 *
 381 * @return: number of polled completion entries
 382 */
 383int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 384{
 385        struct pvrdma_cq *cq = to_vcq(ibcq);
 386        struct pvrdma_qp *cur_qp = NULL;
 387        unsigned long flags;
 388        int npolled;
 389
 390        if (num_entries < 1 || wc == NULL)
 391                return 0;
 392
 393        spin_lock_irqsave(&cq->cq_lock, flags);
 394        for (npolled = 0; npolled < num_entries; ++npolled) {
 395                if (pvrdma_poll_one(cq, &cur_qp, wc + npolled))
 396                        break;
 397        }
 398
 399        spin_unlock_irqrestore(&cq->cq_lock, flags);
 400
 401        /* Ensure we do not return errors from poll_cq */
 402        return npolled;
 403}
 404