linux/drivers/infiniband/core/cq.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2015 HGST, a Western Digital Company.
   4 */
   5#include <linux/module.h>
   6#include <linux/err.h>
   7#include <linux/slab.h>
   8#include <rdma/ib_verbs.h>
   9
  10#include <trace/events/rdma_core.h>
  11
  12/* # of WCs to poll for with a single call to ib_poll_cq */
  13#define IB_POLL_BATCH                   16
  14#define IB_POLL_BATCH_DIRECT            8
  15
  16/* # of WCs to iterate over before yielding */
  17#define IB_POLL_BUDGET_IRQ              256
  18#define IB_POLL_BUDGET_WORKQUEUE        65536
  19
  20#define IB_POLL_FLAGS \
  21        (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
  22
  23static const struct dim_cq_moder
  24rdma_dim_prof[RDMA_DIM_PARAMS_NUM_PROFILES] = {
  25        {1,   0, 1,  0},
  26        {1,   0, 4,  0},
  27        {2,   0, 4,  0},
  28        {2,   0, 8,  0},
  29        {4,   0, 8,  0},
  30        {16,  0, 8,  0},
  31        {16,  0, 16, 0},
  32        {32,  0, 16, 0},
  33        {32,  0, 32, 0},
  34};
  35
  36static void ib_cq_rdma_dim_work(struct work_struct *w)
  37{
  38        struct dim *dim = container_of(w, struct dim, work);
  39        struct ib_cq *cq = dim->priv;
  40
  41        u16 usec = rdma_dim_prof[dim->profile_ix].usec;
  42        u16 comps = rdma_dim_prof[dim->profile_ix].comps;
  43
  44        dim->state = DIM_START_MEASURE;
  45
  46        trace_cq_modify(cq, comps, usec);
  47        cq->device->ops.modify_cq(cq, comps, usec);
  48}
  49
  50static void rdma_dim_init(struct ib_cq *cq)
  51{
  52        struct dim *dim;
  53
  54        if (!cq->device->ops.modify_cq || !cq->device->use_cq_dim ||
  55            cq->poll_ctx == IB_POLL_DIRECT)
  56                return;
  57
  58        dim = kzalloc(sizeof(struct dim), GFP_KERNEL);
  59        if (!dim)
  60                return;
  61
  62        dim->state = DIM_START_MEASURE;
  63        dim->tune_state = DIM_GOING_RIGHT;
  64        dim->profile_ix = RDMA_DIM_START_PROFILE;
  65        dim->priv = cq;
  66        cq->dim = dim;
  67
  68        INIT_WORK(&dim->work, ib_cq_rdma_dim_work);
  69}
  70
  71static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
  72{
  73        int rc;
  74
  75        rc = ib_poll_cq(cq, num_entries, wc);
  76        trace_cq_poll(cq, num_entries, rc);
  77        return rc;
  78}
  79
  80static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
  81                           int batch)
  82{
  83        int i, n, completed = 0;
  84
  85        trace_cq_process(cq);
  86
  87        /*
  88         * budget might be (-1) if the caller does not
  89         * want to bound this call, thus we need unsigned
  90         * minimum here.
  91         */
  92        while ((n = __poll_cq(cq, min_t(u32, batch,
  93                                        budget - completed), wcs)) > 0) {
  94                for (i = 0; i < n; i++) {
  95                        struct ib_wc *wc = &wcs[i];
  96
  97                        if (wc->wr_cqe)
  98                                wc->wr_cqe->done(cq, wc);
  99                        else
 100                                WARN_ON_ONCE(wc->status == IB_WC_SUCCESS);
 101                }
 102
 103                completed += n;
 104
 105                if (n != batch || (budget != -1 && completed >= budget))
 106                        break;
 107        }
 108
 109        return completed;
 110}
 111
 112/**
 113 * ib_process_direct_cq - process a CQ in caller context
 114 * @cq:         CQ to process
 115 * @budget:     number of CQEs to poll for
 116 *
 117 * This function is used to process all outstanding CQ entries.
 118 * It does not offload CQ processing to a different context and does
 119 * not ask for completion interrupts from the HCA.
 120 * Using direct processing on CQ with non IB_POLL_DIRECT type may trigger
 121 * concurrent processing.
 122 *
 123 * Note: do not pass -1 as %budget unless it is guaranteed that the number
 124 * of completions that will be processed is small.
 125 */
 126int ib_process_cq_direct(struct ib_cq *cq, int budget)
 127{
 128        struct ib_wc wcs[IB_POLL_BATCH_DIRECT];
 129
 130        return __ib_process_cq(cq, budget, wcs, IB_POLL_BATCH_DIRECT);
 131}
 132EXPORT_SYMBOL(ib_process_cq_direct);
 133
 134static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
 135{
 136        WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq);
 137}
 138
 139static int ib_poll_handler(struct irq_poll *iop, int budget)
 140{
 141        struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
 142        struct dim *dim = cq->dim;
 143        int completed;
 144
 145        completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
 146        if (completed < budget) {
 147                irq_poll_complete(&cq->iop);
 148                if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) {
 149                        trace_cq_reschedule(cq);
 150                        irq_poll_sched(&cq->iop);
 151                }
 152        }
 153
 154        if (dim)
 155                rdma_dim(dim, completed);
 156
 157        return completed;
 158}
 159
 160static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
 161{
 162        trace_cq_schedule(cq);
 163        irq_poll_sched(&cq->iop);
 164}
 165
 166static void ib_cq_poll_work(struct work_struct *work)
 167{
 168        struct ib_cq *cq = container_of(work, struct ib_cq, work);
 169        int completed;
 170
 171        completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, cq->wc,
 172                                    IB_POLL_BATCH);
 173        if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
 174            ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
 175                queue_work(cq->comp_wq, &cq->work);
 176        else if (cq->dim)
 177                rdma_dim(cq->dim, completed);
 178}
 179
 180static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
 181{
 182        trace_cq_schedule(cq);
 183        queue_work(cq->comp_wq, &cq->work);
 184}
 185
 186/**
 187 * __ib_alloc_cq_user - allocate a completion queue
 188 * @dev:                device to allocate the CQ for
 189 * @private:            driver private data, accessible from cq->cq_context
 190 * @nr_cqe:             number of CQEs to allocate
 191 * @comp_vector:        HCA completion vectors for this CQ
 192 * @poll_ctx:           context to poll the CQ from.
 193 * @caller:             module owner name.
 194 * @udata:              Valid user data or NULL for kernel object
 195 *
 196 * This is the proper interface to allocate a CQ for in-kernel users. A
 197 * CQ allocated with this interface will automatically be polled from the
 198 * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
 199 * to use this CQ abstraction.
 200 */
 201struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
 202                                 int nr_cqe, int comp_vector,
 203                                 enum ib_poll_context poll_ctx,
 204                                 const char *caller, struct ib_udata *udata)
 205{
 206        struct ib_cq_init_attr cq_attr = {
 207                .cqe            = nr_cqe,
 208                .comp_vector    = comp_vector,
 209        };
 210        struct ib_cq *cq;
 211        int ret = -ENOMEM;
 212
 213        cq = rdma_zalloc_drv_obj(dev, ib_cq);
 214        if (!cq)
 215                return ERR_PTR(ret);
 216
 217        cq->device = dev;
 218        cq->cq_context = private;
 219        cq->poll_ctx = poll_ctx;
 220        atomic_set(&cq->usecnt, 0);
 221
 222        cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
 223        if (!cq->wc)
 224                goto out_free_cq;
 225
 226        cq->res.type = RDMA_RESTRACK_CQ;
 227        rdma_restrack_set_task(&cq->res, caller);
 228
 229        ret = dev->ops.create_cq(cq, &cq_attr, NULL);
 230        if (ret)
 231                goto out_free_wc;
 232
 233        rdma_restrack_kadd(&cq->res);
 234
 235        rdma_dim_init(cq);
 236
 237        switch (cq->poll_ctx) {
 238        case IB_POLL_DIRECT:
 239                cq->comp_handler = ib_cq_completion_direct;
 240                break;
 241        case IB_POLL_SOFTIRQ:
 242                cq->comp_handler = ib_cq_completion_softirq;
 243
 244                irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
 245                ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 246                break;
 247        case IB_POLL_WORKQUEUE:
 248        case IB_POLL_UNBOUND_WORKQUEUE:
 249                cq->comp_handler = ib_cq_completion_workqueue;
 250                INIT_WORK(&cq->work, ib_cq_poll_work);
 251                ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 252                cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
 253                                ib_comp_wq : ib_comp_unbound_wq;
 254                break;
 255        default:
 256                ret = -EINVAL;
 257                goto out_destroy_cq;
 258        }
 259
 260        trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
 261        return cq;
 262
 263out_destroy_cq:
 264        rdma_restrack_del(&cq->res);
 265        cq->device->ops.destroy_cq(cq, udata);
 266out_free_wc:
 267        kfree(cq->wc);
 268out_free_cq:
 269        kfree(cq);
 270        trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
 271        return ERR_PTR(ret);
 272}
 273EXPORT_SYMBOL(__ib_alloc_cq_user);
 274
 275/**
 276 * __ib_alloc_cq_any - allocate a completion queue
 277 * @dev:                device to allocate the CQ for
 278 * @private:            driver private data, accessible from cq->cq_context
 279 * @nr_cqe:             number of CQEs to allocate
 280 * @poll_ctx:           context to poll the CQ from
 281 * @caller:             module owner name
 282 *
 283 * Attempt to spread ULP Completion Queues over each device's interrupt
 284 * vectors. A simple best-effort mechanism is used.
 285 */
 286struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
 287                                int nr_cqe, enum ib_poll_context poll_ctx,
 288                                const char *caller)
 289{
 290        static atomic_t counter;
 291        int comp_vector = 0;
 292
 293        if (dev->num_comp_vectors > 1)
 294                comp_vector =
 295                        atomic_inc_return(&counter) %
 296                        min_t(int, dev->num_comp_vectors, num_online_cpus());
 297
 298        return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
 299                                  caller, NULL);
 300}
 301EXPORT_SYMBOL(__ib_alloc_cq_any);
 302
 303/**
 304 * ib_free_cq_user - free a completion queue
 305 * @cq:         completion queue to free.
 306 * @udata:      User data or NULL for kernel object
 307 */
 308void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
 309{
 310        if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
 311                return;
 312
 313        switch (cq->poll_ctx) {
 314        case IB_POLL_DIRECT:
 315                break;
 316        case IB_POLL_SOFTIRQ:
 317                irq_poll_disable(&cq->iop);
 318                break;
 319        case IB_POLL_WORKQUEUE:
 320        case IB_POLL_UNBOUND_WORKQUEUE:
 321                cancel_work_sync(&cq->work);
 322                break;
 323        default:
 324                WARN_ON_ONCE(1);
 325        }
 326
 327        trace_cq_free(cq);
 328        rdma_restrack_del(&cq->res);
 329        cq->device->ops.destroy_cq(cq, udata);
 330        if (cq->dim)
 331                cancel_work_sync(&cq->dim->work);
 332        kfree(cq->dim);
 333        kfree(cq->wc);
 334        kfree(cq);
 335}
 336EXPORT_SYMBOL(ib_free_cq_user);
 337