linux/drivers/infiniband/sw/rdmavt/cq.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2016 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48#include <linux/slab.h>
  49#include <linux/vmalloc.h>
  50#include <linux/kthread.h>
  51#include "cq.h"
  52#include "vt.h"
  53#include "trace.h"
  54
  55/**
  56 * rvt_cq_enter - add a new entry to the completion queue
  57 * @cq: completion queue
  58 * @entry: work completion entry to add
  59 * @sig: true if @entry is solicited
  60 *
  61 * This may be called with qp->s_lock held.
  62 */
  63void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
  64{
  65        struct rvt_cq_wc *wc;
  66        unsigned long flags;
  67        u32 head;
  68        u32 next;
  69
  70        spin_lock_irqsave(&cq->lock, flags);
  71
  72        /*
  73         * Note that the head pointer might be writable by user processes.
  74         * Take care to verify it is a sane value.
  75         */
  76        wc = cq->queue;
  77        head = wc->head;
  78        if (head >= (unsigned)cq->ibcq.cqe) {
  79                head = cq->ibcq.cqe;
  80                next = 0;
  81        } else {
  82                next = head + 1;
  83        }
  84
  85        if (unlikely(next == wc->tail)) {
  86                spin_unlock_irqrestore(&cq->lock, flags);
  87                if (cq->ibcq.event_handler) {
  88                        struct ib_event ev;
  89
  90                        ev.device = cq->ibcq.device;
  91                        ev.element.cq = &cq->ibcq;
  92                        ev.event = IB_EVENT_CQ_ERR;
  93                        cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
  94                }
  95                return;
  96        }
  97        trace_rvt_cq_enter(cq, entry, head);
  98        if (cq->ip) {
  99                wc->uqueue[head].wr_id = entry->wr_id;
 100                wc->uqueue[head].status = entry->status;
 101                wc->uqueue[head].opcode = entry->opcode;
 102                wc->uqueue[head].vendor_err = entry->vendor_err;
 103                wc->uqueue[head].byte_len = entry->byte_len;
 104                wc->uqueue[head].ex.imm_data =
 105                        (__u32 __force)entry->ex.imm_data;
 106                wc->uqueue[head].qp_num = entry->qp->qp_num;
 107                wc->uqueue[head].src_qp = entry->src_qp;
 108                wc->uqueue[head].wc_flags = entry->wc_flags;
 109                wc->uqueue[head].pkey_index = entry->pkey_index;
 110                wc->uqueue[head].slid = ib_lid_cpu16(entry->slid);
 111                wc->uqueue[head].sl = entry->sl;
 112                wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
 113                wc->uqueue[head].port_num = entry->port_num;
 114                /* Make sure entry is written before the head index. */
 115                smp_wmb();
 116        } else {
 117                wc->kqueue[head] = *entry;
 118        }
 119        wc->head = next;
 120
 121        if (cq->notify == IB_CQ_NEXT_COMP ||
 122            (cq->notify == IB_CQ_SOLICITED &&
 123             (solicited || entry->status != IB_WC_SUCCESS))) {
 124                /*
 125                 * This will cause send_complete() to be called in
 126                 * another thread.
 127                 */
 128                spin_lock(&cq->rdi->n_cqs_lock);
 129                if (likely(cq->rdi->worker)) {
 130                        cq->notify = RVT_CQ_NONE;
 131                        cq->triggered++;
 132                        kthread_queue_work(cq->rdi->worker, &cq->comptask);
 133                }
 134                spin_unlock(&cq->rdi->n_cqs_lock);
 135        }
 136
 137        spin_unlock_irqrestore(&cq->lock, flags);
 138}
 139EXPORT_SYMBOL(rvt_cq_enter);
 140
 141static void send_complete(struct kthread_work *work)
 142{
 143        struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
 144
 145        /*
 146         * The completion handler will most likely rearm the notification
 147         * and poll for all pending entries.  If a new completion entry
 148         * is added while we are in this routine, queue_work()
 149         * won't call us again until we return so we check triggered to
 150         * see if we need to call the handler again.
 151         */
 152        for (;;) {
 153                u8 triggered = cq->triggered;
 154
 155                /*
 156                 * IPoIB connected mode assumes the callback is from a
 157                 * soft IRQ. We simulate this by blocking "bottom halves".
 158                 * See the implementation for ipoib_cm_handle_tx_wc(),
 159                 * netif_tx_lock_bh() and netif_tx_lock().
 160                 */
 161                local_bh_disable();
 162                cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
 163                local_bh_enable();
 164
 165                if (cq->triggered == triggered)
 166                        return;
 167        }
 168}
 169
 170/**
 171 * rvt_create_cq - create a completion queue
 172 * @ibdev: the device this completion queue is attached to
 173 * @attr: creation attributes
 174 * @context: unused by the QLogic_IB driver
 175 * @udata: user data for libibverbs.so
 176 *
 177 * Called by ib_create_cq() in the generic verbs code.
 178 *
 179 * Return: pointer to the completion queue or negative errno values
 180 * for failure.
 181 */
 182struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
 183                            const struct ib_cq_init_attr *attr,
 184                            struct ib_ucontext *context,
 185                            struct ib_udata *udata)
 186{
 187        struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
 188        struct rvt_cq *cq;
 189        struct rvt_cq_wc *wc;
 190        struct ib_cq *ret;
 191        u32 sz;
 192        unsigned int entries = attr->cqe;
 193
 194        if (attr->flags)
 195                return ERR_PTR(-EINVAL);
 196
 197        if (entries < 1 || entries > rdi->dparms.props.max_cqe)
 198                return ERR_PTR(-EINVAL);
 199
 200        /* Allocate the completion queue structure. */
 201        cq = kzalloc(sizeof(*cq), GFP_KERNEL);
 202        if (!cq)
 203                return ERR_PTR(-ENOMEM);
 204
 205        /*
 206         * Allocate the completion queue entries and head/tail pointers.
 207         * This is allocated separately so that it can be resized and
 208         * also mapped into user space.
 209         * We need to use vmalloc() in order to support mmap and large
 210         * numbers of entries.
 211         */
 212        sz = sizeof(*wc);
 213        if (udata && udata->outlen >= sizeof(__u64))
 214                sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
 215        else
 216                sz += sizeof(struct ib_wc) * (entries + 1);
 217        wc = vmalloc_user(sz);
 218        if (!wc) {
 219                ret = ERR_PTR(-ENOMEM);
 220                goto bail_cq;
 221        }
 222
 223        /*
 224         * Return the address of the WC as the offset to mmap.
 225         * See rvt_mmap() for details.
 226         */
 227        if (udata && udata->outlen >= sizeof(__u64)) {
 228                int err;
 229
 230                cq->ip = rvt_create_mmap_info(rdi, sz, context, wc);
 231                if (!cq->ip) {
 232                        ret = ERR_PTR(-ENOMEM);
 233                        goto bail_wc;
 234                }
 235
 236                err = ib_copy_to_udata(udata, &cq->ip->offset,
 237                                       sizeof(cq->ip->offset));
 238                if (err) {
 239                        ret = ERR_PTR(err);
 240                        goto bail_ip;
 241                }
 242        }
 243
 244        spin_lock_irq(&rdi->n_cqs_lock);
 245        if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
 246                spin_unlock_irq(&rdi->n_cqs_lock);
 247                ret = ERR_PTR(-ENOMEM);
 248                goto bail_ip;
 249        }
 250
 251        rdi->n_cqs_allocated++;
 252        spin_unlock_irq(&rdi->n_cqs_lock);
 253
 254        if (cq->ip) {
 255                spin_lock_irq(&rdi->pending_lock);
 256                list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
 257                spin_unlock_irq(&rdi->pending_lock);
 258        }
 259
 260        /*
 261         * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
 262         * The number of entries should be >= the number requested or return
 263         * an error.
 264         */
 265        cq->rdi = rdi;
 266        cq->ibcq.cqe = entries;
 267        cq->notify = RVT_CQ_NONE;
 268        spin_lock_init(&cq->lock);
 269        kthread_init_work(&cq->comptask, send_complete);
 270        cq->queue = wc;
 271
 272        ret = &cq->ibcq;
 273
 274        goto done;
 275
 276bail_ip:
 277        kfree(cq->ip);
 278bail_wc:
 279        vfree(wc);
 280bail_cq:
 281        kfree(cq);
 282done:
 283        return ret;
 284}
 285
 286/**
 287 * rvt_destroy_cq - destroy a completion queue
 288 * @ibcq: the completion queue to destroy.
 289 *
 290 * Called by ib_destroy_cq() in the generic verbs code.
 291 *
 292 * Return: always 0
 293 */
 294int rvt_destroy_cq(struct ib_cq *ibcq)
 295{
 296        struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 297        struct rvt_dev_info *rdi = cq->rdi;
 298
 299        kthread_flush_work(&cq->comptask);
 300        spin_lock_irq(&rdi->n_cqs_lock);
 301        rdi->n_cqs_allocated--;
 302        spin_unlock_irq(&rdi->n_cqs_lock);
 303        if (cq->ip)
 304                kref_put(&cq->ip->ref, rvt_release_mmap_info);
 305        else
 306                vfree(cq->queue);
 307        kfree(cq);
 308
 309        return 0;
 310}
 311
 312/**
 313 * rvt_req_notify_cq - change the notification type for a completion queue
 314 * @ibcq: the completion queue
 315 * @notify_flags: the type of notification to request
 316 *
 317 * This may be called from interrupt context.  Also called by
 318 * ib_req_notify_cq() in the generic verbs code.
 319 *
 320 * Return: 0 for success.
 321 */
 322int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
 323{
 324        struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 325        unsigned long flags;
 326        int ret = 0;
 327
 328        spin_lock_irqsave(&cq->lock, flags);
 329        /*
 330         * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
 331         * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
 332         */
 333        if (cq->notify != IB_CQ_NEXT_COMP)
 334                cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
 335
 336        if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
 337            cq->queue->head != cq->queue->tail)
 338                ret = 1;
 339
 340        spin_unlock_irqrestore(&cq->lock, flags);
 341
 342        return ret;
 343}
 344
 345/**
 346 * rvt_resize_cq - change the size of the CQ
 347 * @ibcq: the completion queue
 348 *
 349 * Return: 0 for success.
 350 */
 351int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 352{
 353        struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 354        struct rvt_cq_wc *old_wc;
 355        struct rvt_cq_wc *wc;
 356        u32 head, tail, n;
 357        int ret;
 358        u32 sz;
 359        struct rvt_dev_info *rdi = cq->rdi;
 360
 361        if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
 362                return -EINVAL;
 363
 364        /*
 365         * Need to use vmalloc() if we want to support large #s of entries.
 366         */
 367        sz = sizeof(*wc);
 368        if (udata && udata->outlen >= sizeof(__u64))
 369                sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
 370        else
 371                sz += sizeof(struct ib_wc) * (cqe + 1);
 372        wc = vmalloc_user(sz);
 373        if (!wc)
 374                return -ENOMEM;
 375
 376        /* Check that we can write the offset to mmap. */
 377        if (udata && udata->outlen >= sizeof(__u64)) {
 378                __u64 offset = 0;
 379
 380                ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
 381                if (ret)
 382                        goto bail_free;
 383        }
 384
 385        spin_lock_irq(&cq->lock);
 386        /*
 387         * Make sure head and tail are sane since they
 388         * might be user writable.
 389         */
 390        old_wc = cq->queue;
 391        head = old_wc->head;
 392        if (head > (u32)cq->ibcq.cqe)
 393                head = (u32)cq->ibcq.cqe;
 394        tail = old_wc->tail;
 395        if (tail > (u32)cq->ibcq.cqe)
 396                tail = (u32)cq->ibcq.cqe;
 397        if (head < tail)
 398                n = cq->ibcq.cqe + 1 + head - tail;
 399        else
 400                n = head - tail;
 401        if (unlikely((u32)cqe < n)) {
 402                ret = -EINVAL;
 403                goto bail_unlock;
 404        }
 405        for (n = 0; tail != head; n++) {
 406                if (cq->ip)
 407                        wc->uqueue[n] = old_wc->uqueue[tail];
 408                else
 409                        wc->kqueue[n] = old_wc->kqueue[tail];
 410                if (tail == (u32)cq->ibcq.cqe)
 411                        tail = 0;
 412                else
 413                        tail++;
 414        }
 415        cq->ibcq.cqe = cqe;
 416        wc->head = n;
 417        wc->tail = 0;
 418        cq->queue = wc;
 419        spin_unlock_irq(&cq->lock);
 420
 421        vfree(old_wc);
 422
 423        if (cq->ip) {
 424                struct rvt_mmap_info *ip = cq->ip;
 425
 426                rvt_update_mmap_info(rdi, ip, sz, wc);
 427
 428                /*
 429                 * Return the offset to mmap.
 430                 * See rvt_mmap() for details.
 431                 */
 432                if (udata && udata->outlen >= sizeof(__u64)) {
 433                        ret = ib_copy_to_udata(udata, &ip->offset,
 434                                               sizeof(ip->offset));
 435                        if (ret)
 436                                return ret;
 437                }
 438
 439                spin_lock_irq(&rdi->pending_lock);
 440                if (list_empty(&ip->pending_mmaps))
 441                        list_add(&ip->pending_mmaps, &rdi->pending_mmaps);
 442                spin_unlock_irq(&rdi->pending_lock);
 443        }
 444
 445        return 0;
 446
 447bail_unlock:
 448        spin_unlock_irq(&cq->lock);
 449bail_free:
 450        vfree(wc);
 451        return ret;
 452}
 453
 454/**
 455 * rvt_poll_cq - poll for work completion entries
 456 * @ibcq: the completion queue to poll
 457 * @num_entries: the maximum number of entries to return
 458 * @entry: pointer to array where work completions are placed
 459 *
 460 * This may be called from interrupt context.  Also called by ib_poll_cq()
 461 * in the generic verbs code.
 462 *
 463 * Return: the number of completion entries polled.
 464 */
 465int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 466{
 467        struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 468        struct rvt_cq_wc *wc;
 469        unsigned long flags;
 470        int npolled;
 471        u32 tail;
 472
 473        /* The kernel can only poll a kernel completion queue */
 474        if (cq->ip)
 475                return -EINVAL;
 476
 477        spin_lock_irqsave(&cq->lock, flags);
 478
 479        wc = cq->queue;
 480        tail = wc->tail;
 481        if (tail > (u32)cq->ibcq.cqe)
 482                tail = (u32)cq->ibcq.cqe;
 483        for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
 484                if (tail == wc->head)
 485                        break;
 486                /* The kernel doesn't need a RMB since it has the lock. */
 487                trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled);
 488                *entry = wc->kqueue[tail];
 489                if (tail >= cq->ibcq.cqe)
 490                        tail = 0;
 491                else
 492                        tail++;
 493        }
 494        wc->tail = tail;
 495
 496        spin_unlock_irqrestore(&cq->lock, flags);
 497
 498        return npolled;
 499}
 500
 501/**
 502 * rvt_driver_cq_init - Init cq resources on behalf of driver
 503 * @rdi: rvt dev structure
 504 *
 505 * Return: 0 on success
 506 */
 507int rvt_driver_cq_init(struct rvt_dev_info *rdi)
 508{
 509        int cpu;
 510        struct kthread_worker *worker;
 511
 512        if (rdi->worker)
 513                return 0;
 514
 515        spin_lock_init(&rdi->n_cqs_lock);
 516
 517        cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
 518        worker = kthread_create_worker_on_cpu(cpu, 0,
 519                                              "%s", rdi->dparms.cq_name);
 520        if (IS_ERR(worker))
 521                return PTR_ERR(worker);
 522
 523        set_user_nice(worker->task, MIN_NICE);
 524        rdi->worker = worker;
 525        return 0;
 526}
 527
 528/**
 529 * rvt_cq_exit - tear down cq reources
 530 * @rdi: rvt dev structure
 531 */
 532void rvt_cq_exit(struct rvt_dev_info *rdi)
 533{
 534        struct kthread_worker *worker;
 535
 536        /* block future queuing from send_complete() */
 537        spin_lock_irq(&rdi->n_cqs_lock);
 538        worker = rdi->worker;
 539        if (!worker) {
 540                spin_unlock_irq(&rdi->n_cqs_lock);
 541                return;
 542        }
 543        rdi->worker = NULL;
 544        spin_unlock_irq(&rdi->n_cqs_lock);
 545
 546        kthread_destroy_worker(worker);
 547}
 548