linux/drivers/infiniband/hw/qib/qib_cq.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2013 Intel Corporation.  All rights reserved.
   3 * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.
   4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 */
  34
  35#include <linux/err.h>
  36#include <linux/slab.h>
  37#include <linux/vmalloc.h>
  38#include <linux/kthread.h>
  39
  40#include "qib_verbs.h"
  41#include "qib.h"
  42
  43/**
  44 * qib_cq_enter - add a new entry to the completion queue
  45 * @cq: completion queue
  46 * @entry: work completion entry to add
  47 * @sig: true if @entry is a solicitated entry
  48 *
  49 * This may be called with qp->s_lock held.
  50 */
  51void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
  52{
  53        struct qib_cq_wc *wc;
  54        unsigned long flags;
  55        u32 head;
  56        u32 next;
  57
  58        spin_lock_irqsave(&cq->lock, flags);
  59
  60        /*
  61         * Note that the head pointer might be writable by user processes.
  62         * Take care to verify it is a sane value.
  63         */
  64        wc = cq->queue;
  65        head = wc->head;
  66        if (head >= (unsigned) cq->ibcq.cqe) {
  67                head = cq->ibcq.cqe;
  68                next = 0;
  69        } else
  70                next = head + 1;
  71        if (unlikely(next == wc->tail)) {
  72                spin_unlock_irqrestore(&cq->lock, flags);
  73                if (cq->ibcq.event_handler) {
  74                        struct ib_event ev;
  75
  76                        ev.device = cq->ibcq.device;
  77                        ev.element.cq = &cq->ibcq;
  78                        ev.event = IB_EVENT_CQ_ERR;
  79                        cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
  80                }
  81                return;
  82        }
  83        if (cq->ip) {
  84                wc->uqueue[head].wr_id = entry->wr_id;
  85                wc->uqueue[head].status = entry->status;
  86                wc->uqueue[head].opcode = entry->opcode;
  87                wc->uqueue[head].vendor_err = entry->vendor_err;
  88                wc->uqueue[head].byte_len = entry->byte_len;
  89                wc->uqueue[head].ex.imm_data =
  90                        (__u32 __force)entry->ex.imm_data;
  91                wc->uqueue[head].qp_num = entry->qp->qp_num;
  92                wc->uqueue[head].src_qp = entry->src_qp;
  93                wc->uqueue[head].wc_flags = entry->wc_flags;
  94                wc->uqueue[head].pkey_index = entry->pkey_index;
  95                wc->uqueue[head].slid = entry->slid;
  96                wc->uqueue[head].sl = entry->sl;
  97                wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
  98                wc->uqueue[head].port_num = entry->port_num;
  99                /* Make sure entry is written before the head index. */
 100                smp_wmb();
 101        } else
 102                wc->kqueue[head] = *entry;
 103        wc->head = next;
 104
 105        if (cq->notify == IB_CQ_NEXT_COMP ||
 106            (cq->notify == IB_CQ_SOLICITED &&
 107             (solicited || entry->status != IB_WC_SUCCESS))) {
 108                struct kthread_worker *worker;
 109                /*
 110                 * This will cause send_complete() to be called in
 111                 * another thread.
 112                 */
 113                smp_rmb();
 114                worker = cq->dd->worker;
 115                if (likely(worker)) {
 116                        cq->notify = IB_CQ_NONE;
 117                        cq->triggered++;
 118                        queue_kthread_work(worker, &cq->comptask);
 119                }
 120        }
 121
 122        spin_unlock_irqrestore(&cq->lock, flags);
 123}
 124
 125/**
 126 * qib_poll_cq - poll for work completion entries
 127 * @ibcq: the completion queue to poll
 128 * @num_entries: the maximum number of entries to return
 129 * @entry: pointer to array where work completions are placed
 130 *
 131 * Returns the number of completion entries polled.
 132 *
 133 * This may be called from interrupt context.  Also called by ib_poll_cq()
 134 * in the generic verbs code.
 135 */
 136int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 137{
 138        struct qib_cq *cq = to_icq(ibcq);
 139        struct qib_cq_wc *wc;
 140        unsigned long flags;
 141        int npolled;
 142        u32 tail;
 143
 144        /* The kernel can only poll a kernel completion queue */
 145        if (cq->ip) {
 146                npolled = -EINVAL;
 147                goto bail;
 148        }
 149
 150        spin_lock_irqsave(&cq->lock, flags);
 151
 152        wc = cq->queue;
 153        tail = wc->tail;
 154        if (tail > (u32) cq->ibcq.cqe)
 155                tail = (u32) cq->ibcq.cqe;
 156        for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
 157                if (tail == wc->head)
 158                        break;
 159                /* The kernel doesn't need a RMB since it has the lock. */
 160                *entry = wc->kqueue[tail];
 161                if (tail >= cq->ibcq.cqe)
 162                        tail = 0;
 163                else
 164                        tail++;
 165        }
 166        wc->tail = tail;
 167
 168        spin_unlock_irqrestore(&cq->lock, flags);
 169
 170bail:
 171        return npolled;
 172}
 173
 174static void send_complete(struct kthread_work *work)
 175{
 176        struct qib_cq *cq = container_of(work, struct qib_cq, comptask);
 177
 178        /*
 179         * The completion handler will most likely rearm the notification
 180         * and poll for all pending entries.  If a new completion entry
 181         * is added while we are in this routine, queue_work()
 182         * won't call us again until we return so we check triggered to
 183         * see if we need to call the handler again.
 184         */
 185        for (;;) {
 186                u8 triggered = cq->triggered;
 187
 188                /*
 189                 * IPoIB connected mode assumes the callback is from a
 190                 * soft IRQ. We simulate this by blocking "bottom halves".
 191                 * See the implementation for ipoib_cm_handle_tx_wc(),
 192                 * netif_tx_lock_bh() and netif_tx_lock().
 193                 */
 194                local_bh_disable();
 195                cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
 196                local_bh_enable();
 197
 198                if (cq->triggered == triggered)
 199                        return;
 200        }
 201}
 202
 203/**
 204 * qib_create_cq - create a completion queue
 205 * @ibdev: the device this completion queue is attached to
 206 * @entries: the minimum size of the completion queue
 207 * @context: unused by the QLogic_IB driver
 208 * @udata: user data for libibverbs.so
 209 *
 210 * Returns a pointer to the completion queue or negative errno values
 211 * for failure.
 212 *
 213 * Called by ib_create_cq() in the generic verbs code.
 214 */
 215struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
 216                            int comp_vector, struct ib_ucontext *context,
 217                            struct ib_udata *udata)
 218{
 219        struct qib_ibdev *dev = to_idev(ibdev);
 220        struct qib_cq *cq;
 221        struct qib_cq_wc *wc;
 222        struct ib_cq *ret;
 223        u32 sz;
 224
 225        if (entries < 1 || entries > ib_qib_max_cqes) {
 226                ret = ERR_PTR(-EINVAL);
 227                goto done;
 228        }
 229
 230        /* Allocate the completion queue structure. */
 231        cq = kmalloc(sizeof(*cq), GFP_KERNEL);
 232        if (!cq) {
 233                ret = ERR_PTR(-ENOMEM);
 234                goto done;
 235        }
 236
 237        /*
 238         * Allocate the completion queue entries and head/tail pointers.
 239         * This is allocated separately so that it can be resized and
 240         * also mapped into user space.
 241         * We need to use vmalloc() in order to support mmap and large
 242         * numbers of entries.
 243         */
 244        sz = sizeof(*wc);
 245        if (udata && udata->outlen >= sizeof(__u64))
 246                sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
 247        else
 248                sz += sizeof(struct ib_wc) * (entries + 1);
 249        wc = vmalloc_user(sz);
 250        if (!wc) {
 251                ret = ERR_PTR(-ENOMEM);
 252                goto bail_cq;
 253        }
 254
 255        /*
 256         * Return the address of the WC as the offset to mmap.
 257         * See qib_mmap() for details.
 258         */
 259        if (udata && udata->outlen >= sizeof(__u64)) {
 260                int err;
 261
 262                cq->ip = qib_create_mmap_info(dev, sz, context, wc);
 263                if (!cq->ip) {
 264                        ret = ERR_PTR(-ENOMEM);
 265                        goto bail_wc;
 266                }
 267
 268                err = ib_copy_to_udata(udata, &cq->ip->offset,
 269                                       sizeof(cq->ip->offset));
 270                if (err) {
 271                        ret = ERR_PTR(err);
 272                        goto bail_ip;
 273                }
 274        } else
 275                cq->ip = NULL;
 276
 277        spin_lock(&dev->n_cqs_lock);
 278        if (dev->n_cqs_allocated == ib_qib_max_cqs) {
 279                spin_unlock(&dev->n_cqs_lock);
 280                ret = ERR_PTR(-ENOMEM);
 281                goto bail_ip;
 282        }
 283
 284        dev->n_cqs_allocated++;
 285        spin_unlock(&dev->n_cqs_lock);
 286
 287        if (cq->ip) {
 288                spin_lock_irq(&dev->pending_lock);
 289                list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
 290                spin_unlock_irq(&dev->pending_lock);
 291        }
 292
 293        /*
 294         * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
 295         * The number of entries should be >= the number requested or return
 296         * an error.
 297         */
 298        cq->dd = dd_from_dev(dev);
 299        cq->ibcq.cqe = entries;
 300        cq->notify = IB_CQ_NONE;
 301        cq->triggered = 0;
 302        spin_lock_init(&cq->lock);
 303        init_kthread_work(&cq->comptask, send_complete);
 304        wc->head = 0;
 305        wc->tail = 0;
 306        cq->queue = wc;
 307
 308        ret = &cq->ibcq;
 309
 310        goto done;
 311
 312bail_ip:
 313        kfree(cq->ip);
 314bail_wc:
 315        vfree(wc);
 316bail_cq:
 317        kfree(cq);
 318done:
 319        return ret;
 320}
 321
 322/**
 323 * qib_destroy_cq - destroy a completion queue
 324 * @ibcq: the completion queue to destroy.
 325 *
 326 * Returns 0 for success.
 327 *
 328 * Called by ib_destroy_cq() in the generic verbs code.
 329 */
 330int qib_destroy_cq(struct ib_cq *ibcq)
 331{
 332        struct qib_ibdev *dev = to_idev(ibcq->device);
 333        struct qib_cq *cq = to_icq(ibcq);
 334
 335        flush_kthread_work(&cq->comptask);
 336        spin_lock(&dev->n_cqs_lock);
 337        dev->n_cqs_allocated--;
 338        spin_unlock(&dev->n_cqs_lock);
 339        if (cq->ip)
 340                kref_put(&cq->ip->ref, qib_release_mmap_info);
 341        else
 342                vfree(cq->queue);
 343        kfree(cq);
 344
 345        return 0;
 346}
 347
 348/**
 349 * qib_req_notify_cq - change the notification type for a completion queue
 350 * @ibcq: the completion queue
 351 * @notify_flags: the type of notification to request
 352 *
 353 * Returns 0 for success.
 354 *
 355 * This may be called from interrupt context.  Also called by
 356 * ib_req_notify_cq() in the generic verbs code.
 357 */
 358int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
 359{
 360        struct qib_cq *cq = to_icq(ibcq);
 361        unsigned long flags;
 362        int ret = 0;
 363
 364        spin_lock_irqsave(&cq->lock, flags);
 365        /*
 366         * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
 367         * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
 368         */
 369        if (cq->notify != IB_CQ_NEXT_COMP)
 370                cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
 371
 372        if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
 373            cq->queue->head != cq->queue->tail)
 374                ret = 1;
 375
 376        spin_unlock_irqrestore(&cq->lock, flags);
 377
 378        return ret;
 379}
 380
 381/**
 382 * qib_resize_cq - change the size of the CQ
 383 * @ibcq: the completion queue
 384 *
 385 * Returns 0 for success.
 386 */
 387int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 388{
 389        struct qib_cq *cq = to_icq(ibcq);
 390        struct qib_cq_wc *old_wc;
 391        struct qib_cq_wc *wc;
 392        u32 head, tail, n;
 393        int ret;
 394        u32 sz;
 395
 396        if (cqe < 1 || cqe > ib_qib_max_cqes) {
 397                ret = -EINVAL;
 398                goto bail;
 399        }
 400
 401        /*
 402         * Need to use vmalloc() if we want to support large #s of entries.
 403         */
 404        sz = sizeof(*wc);
 405        if (udata && udata->outlen >= sizeof(__u64))
 406                sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
 407        else
 408                sz += sizeof(struct ib_wc) * (cqe + 1);
 409        wc = vmalloc_user(sz);
 410        if (!wc) {
 411                ret = -ENOMEM;
 412                goto bail;
 413        }
 414
 415        /* Check that we can write the offset to mmap. */
 416        if (udata && udata->outlen >= sizeof(__u64)) {
 417                __u64 offset = 0;
 418
 419                ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
 420                if (ret)
 421                        goto bail_free;
 422        }
 423
 424        spin_lock_irq(&cq->lock);
 425        /*
 426         * Make sure head and tail are sane since they
 427         * might be user writable.
 428         */
 429        old_wc = cq->queue;
 430        head = old_wc->head;
 431        if (head > (u32) cq->ibcq.cqe)
 432                head = (u32) cq->ibcq.cqe;
 433        tail = old_wc->tail;
 434        if (tail > (u32) cq->ibcq.cqe)
 435                tail = (u32) cq->ibcq.cqe;
 436        if (head < tail)
 437                n = cq->ibcq.cqe + 1 + head - tail;
 438        else
 439                n = head - tail;
 440        if (unlikely((u32)cqe < n)) {
 441                ret = -EINVAL;
 442                goto bail_unlock;
 443        }
 444        for (n = 0; tail != head; n++) {
 445                if (cq->ip)
 446                        wc->uqueue[n] = old_wc->uqueue[tail];
 447                else
 448                        wc->kqueue[n] = old_wc->kqueue[tail];
 449                if (tail == (u32) cq->ibcq.cqe)
 450                        tail = 0;
 451                else
 452                        tail++;
 453        }
 454        cq->ibcq.cqe = cqe;
 455        wc->head = n;
 456        wc->tail = 0;
 457        cq->queue = wc;
 458        spin_unlock_irq(&cq->lock);
 459
 460        vfree(old_wc);
 461
 462        if (cq->ip) {
 463                struct qib_ibdev *dev = to_idev(ibcq->device);
 464                struct qib_mmap_info *ip = cq->ip;
 465
 466                qib_update_mmap_info(dev, ip, sz, wc);
 467
 468                /*
 469                 * Return the offset to mmap.
 470                 * See qib_mmap() for details.
 471                 */
 472                if (udata && udata->outlen >= sizeof(__u64)) {
 473                        ret = ib_copy_to_udata(udata, &ip->offset,
 474                                               sizeof(ip->offset));
 475                        if (ret)
 476                                goto bail;
 477                }
 478
 479                spin_lock_irq(&dev->pending_lock);
 480                if (list_empty(&ip->pending_mmaps))
 481                        list_add(&ip->pending_mmaps, &dev->pending_mmaps);
 482                spin_unlock_irq(&dev->pending_lock);
 483        }
 484
 485        ret = 0;
 486        goto bail;
 487
 488bail_unlock:
 489        spin_unlock_irq(&cq->lock);
 490bail_free:
 491        vfree(wc);
 492bail:
 493        return ret;
 494}
 495
 496int qib_cq_init(struct qib_devdata *dd)
 497{
 498        int ret = 0;
 499        int cpu;
 500        struct task_struct *task;
 501
 502        if (dd->worker)
 503                return 0;
 504        dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
 505        if (!dd->worker)
 506                return -ENOMEM;
 507        init_kthread_worker(dd->worker);
 508        task = kthread_create_on_node(
 509                kthread_worker_fn,
 510                dd->worker,
 511                dd->assigned_node_id,
 512                "qib_cq%d", dd->unit);
 513        if (IS_ERR(task))
 514                goto task_fail;
 515        cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
 516        kthread_bind(task, cpu);
 517        wake_up_process(task);
 518out:
 519        return ret;
 520task_fail:
 521        ret = PTR_ERR(task);
 522        kfree(dd->worker);
 523        dd->worker = NULL;
 524        goto out;
 525}
 526
 527void qib_cq_exit(struct qib_devdata *dd)
 528{
 529        struct kthread_worker *worker;
 530
 531        worker = dd->worker;
 532        if (!worker)
 533                return;
 534        /* blocks future queuing from send_complete() */
 535        dd->worker = NULL;
 536        smp_wmb();
 537        flush_kthread_worker(worker);
 538        kthread_stop(worker->task);
 539        kfree(worker);
 540}
 541