linux/drivers/infiniband/sw/rdmavt/qp.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2016, 2017 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48#include <linux/hash.h>
  49#include <linux/bitops.h>
  50#include <linux/lockdep.h>
  51#include <linux/vmalloc.h>
  52#include <linux/slab.h>
  53#include <rdma/ib_verbs.h>
  54#include <rdma/ib_hdrs.h>
  55#include <rdma/opa_addr.h>
  56#include "qp.h"
  57#include "vt.h"
  58#include "trace.h"
  59
  60static void rvt_rc_timeout(struct timer_list *t);
  61
  62/*
  63 * Convert the AETH RNR timeout code into the number of microseconds.
  64 */
  65static const u32 ib_rvt_rnr_table[32] = {
  66        655360, /* 00: 655.36 */
  67        10,     /* 01:    .01 */
  68        20,     /* 02     .02 */
  69        30,     /* 03:    .03 */
  70        40,     /* 04:    .04 */
  71        60,     /* 05:    .06 */
  72        80,     /* 06:    .08 */
  73        120,    /* 07:    .12 */
  74        160,    /* 08:    .16 */
  75        240,    /* 09:    .24 */
  76        320,    /* 0A:    .32 */
  77        480,    /* 0B:    .48 */
  78        640,    /* 0C:    .64 */
  79        960,    /* 0D:    .96 */
  80        1280,   /* 0E:   1.28 */
  81        1920,   /* 0F:   1.92 */
  82        2560,   /* 10:   2.56 */
  83        3840,   /* 11:   3.84 */
  84        5120,   /* 12:   5.12 */
  85        7680,   /* 13:   7.68 */
  86        10240,  /* 14:  10.24 */
  87        15360,  /* 15:  15.36 */
  88        20480,  /* 16:  20.48 */
  89        30720,  /* 17:  30.72 */
  90        40960,  /* 18:  40.96 */
  91        61440,  /* 19:  61.44 */
  92        81920,  /* 1A:  81.92 */
  93        122880, /* 1B: 122.88 */
  94        163840, /* 1C: 163.84 */
  95        245760, /* 1D: 245.76 */
  96        327680, /* 1E: 327.68 */
  97        491520  /* 1F: 491.52 */
  98};
  99
 100/*
 101 * Note that it is OK to post send work requests in the SQE and ERR
 102 * states; rvt_do_send() will process them and generate error
 103 * completions as per IB 1.2 C10-96.
 104 */
 105const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
 106        [IB_QPS_RESET] = 0,
 107        [IB_QPS_INIT] = RVT_POST_RECV_OK,
 108        [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
 109        [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
 110            RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
 111            RVT_PROCESS_NEXT_SEND_OK,
 112        [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
 113            RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
 114        [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
 115            RVT_POST_SEND_OK | RVT_FLUSH_SEND,
 116        [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
 117            RVT_POST_SEND_OK | RVT_FLUSH_SEND,
 118};
 119EXPORT_SYMBOL(ib_rvt_state_ops);
 120
 121static void get_map_page(struct rvt_qpn_table *qpt,
 122                         struct rvt_qpn_map *map)
 123{
 124        unsigned long page = get_zeroed_page(GFP_KERNEL);
 125
 126        /*
 127         * Free the page if someone raced with us installing it.
 128         */
 129
 130        spin_lock(&qpt->lock);
 131        if (map->page)
 132                free_page(page);
 133        else
 134                map->page = (void *)page;
 135        spin_unlock(&qpt->lock);
 136}
 137
 138/**
 139 * init_qpn_table - initialize the QP number table for a device
 140 * @qpt: the QPN table
 141 */
 142static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
 143{
 144        u32 offset, i;
 145        struct rvt_qpn_map *map;
 146        int ret = 0;
 147
 148        if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start))
 149                return -EINVAL;
 150
 151        spin_lock_init(&qpt->lock);
 152
 153        qpt->last = rdi->dparms.qpn_start;
 154        qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift;
 155
 156        /*
 157         * Drivers may want some QPs beyond what we need for verbs let them use
 158         * our qpn table. No need for two. Lets go ahead and mark the bitmaps
 159         * for those. The reserved range must be *after* the range which verbs
 160         * will pick from.
 161         */
 162
 163        /* Figure out number of bit maps needed before reserved range */
 164        qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE;
 165
 166        /* This should always be zero */
 167        offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK;
 168
 169        /* Starting with the first reserved bit map */
 170        map = &qpt->map[qpt->nmaps];
 171
 172        rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n",
 173                    rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
 174        for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
 175                if (!map->page) {
 176                        get_map_page(qpt, map);
 177                        if (!map->page) {
 178                                ret = -ENOMEM;
 179                                break;
 180                        }
 181                }
 182                set_bit(offset, map->page);
 183                offset++;
 184                if (offset == RVT_BITS_PER_PAGE) {
 185                        /* next page */
 186                        qpt->nmaps++;
 187                        map++;
 188                        offset = 0;
 189                }
 190        }
 191        return ret;
 192}
 193
 194/**
 195 * free_qpn_table - free the QP number table for a device
 196 * @qpt: the QPN table
 197 */
 198static void free_qpn_table(struct rvt_qpn_table *qpt)
 199{
 200        int i;
 201
 202        for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
 203                free_page((unsigned long)qpt->map[i].page);
 204}
 205
 206/**
 207 * rvt_driver_qp_init - Init driver qp resources
 208 * @rdi: rvt dev strucutre
 209 *
 210 * Return: 0 on success
 211 */
 212int rvt_driver_qp_init(struct rvt_dev_info *rdi)
 213{
 214        int i;
 215        int ret = -ENOMEM;
 216
 217        if (!rdi->dparms.qp_table_size)
 218                return -EINVAL;
 219
 220        /*
 221         * If driver is not doing any QP allocation then make sure it is
 222         * providing the necessary QP functions.
 223         */
 224        if (!rdi->driver_f.free_all_qps ||
 225            !rdi->driver_f.qp_priv_alloc ||
 226            !rdi->driver_f.qp_priv_free ||
 227            !rdi->driver_f.notify_qp_reset ||
 228            !rdi->driver_f.notify_restart_rc)
 229                return -EINVAL;
 230
 231        /* allocate parent object */
 232        rdi->qp_dev = kzalloc_node(sizeof(*rdi->qp_dev), GFP_KERNEL,
 233                                   rdi->dparms.node);
 234        if (!rdi->qp_dev)
 235                return -ENOMEM;
 236
 237        /* allocate hash table */
 238        rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
 239        rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
 240        rdi->qp_dev->qp_table =
 241                kmalloc_array_node(rdi->qp_dev->qp_table_size,
 242                             sizeof(*rdi->qp_dev->qp_table),
 243                             GFP_KERNEL, rdi->dparms.node);
 244        if (!rdi->qp_dev->qp_table)
 245                goto no_qp_table;
 246
 247        for (i = 0; i < rdi->qp_dev->qp_table_size; i++)
 248                RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL);
 249
 250        spin_lock_init(&rdi->qp_dev->qpt_lock);
 251
 252        /* initialize qpn map */
 253        if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
 254                goto fail_table;
 255
 256        spin_lock_init(&rdi->n_qps_lock);
 257
 258        return 0;
 259
 260fail_table:
 261        kfree(rdi->qp_dev->qp_table);
 262        free_qpn_table(&rdi->qp_dev->qpn_table);
 263
 264no_qp_table:
 265        kfree(rdi->qp_dev);
 266
 267        return ret;
 268}
 269
 270/**
 271 * free_all_qps - check for QPs still in use
 272 * @rdi: rvt device info structure
 273 *
 274 * There should not be any QPs still in use.
 275 * Free memory for table.
 276 */
 277static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
 278{
 279        unsigned long flags;
 280        struct rvt_qp *qp;
 281        unsigned n, qp_inuse = 0;
 282        spinlock_t *ql; /* work around too long line below */
 283
 284        if (rdi->driver_f.free_all_qps)
 285                qp_inuse = rdi->driver_f.free_all_qps(rdi);
 286
 287        qp_inuse += rvt_mcast_tree_empty(rdi);
 288
 289        if (!rdi->qp_dev)
 290                return qp_inuse;
 291
 292        ql = &rdi->qp_dev->qpt_lock;
 293        spin_lock_irqsave(ql, flags);
 294        for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
 295                qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
 296                                               lockdep_is_held(ql));
 297                RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
 298
 299                for (; qp; qp = rcu_dereference_protected(qp->next,
 300                                                          lockdep_is_held(ql)))
 301                        qp_inuse++;
 302        }
 303        spin_unlock_irqrestore(ql, flags);
 304        synchronize_rcu();
 305        return qp_inuse;
 306}
 307
 308/**
 309 * rvt_qp_exit - clean up qps on device exit
 310 * @rdi: rvt dev structure
 311 *
 312 * Check for qp leaks and free resources.
 313 */
 314void rvt_qp_exit(struct rvt_dev_info *rdi)
 315{
 316        u32 qps_inuse = rvt_free_all_qps(rdi);
 317
 318        if (qps_inuse)
 319                rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
 320                           qps_inuse);
 321        if (!rdi->qp_dev)
 322                return;
 323
 324        kfree(rdi->qp_dev->qp_table);
 325        free_qpn_table(&rdi->qp_dev->qpn_table);
 326        kfree(rdi->qp_dev);
 327}
 328
 329static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
 330                              struct rvt_qpn_map *map, unsigned off)
 331{
 332        return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
 333}
 334
 335/**
 336 * alloc_qpn - Allocate the next available qpn or zero/one for QP type
 337 *             IB_QPT_SMI/IB_QPT_GSI
 338 * @rdi: rvt device info structure
 339 * @qpt: queue pair number table pointer
 340 * @port_num: IB port number, 1 based, comes from core
 341 *
 342 * Return: The queue pair number
 343 */
 344static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 345                     enum ib_qp_type type, u8 port_num)
 346{
 347        u32 i, offset, max_scan, qpn;
 348        struct rvt_qpn_map *map;
 349        u32 ret;
 350
 351        if (rdi->driver_f.alloc_qpn)
 352                return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num);
 353
 354        if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
 355                unsigned n;
 356
 357                ret = type == IB_QPT_GSI;
 358                n = 1 << (ret + 2 * (port_num - 1));
 359                spin_lock(&qpt->lock);
 360                if (qpt->flags & n)
 361                        ret = -EINVAL;
 362                else
 363                        qpt->flags |= n;
 364                spin_unlock(&qpt->lock);
 365                goto bail;
 366        }
 367
 368        qpn = qpt->last + qpt->incr;
 369        if (qpn >= RVT_QPN_MAX)
 370                qpn = qpt->incr | ((qpt->last & 1) ^ 1);
 371        /* offset carries bit 0 */
 372        offset = qpn & RVT_BITS_PER_PAGE_MASK;
 373        map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
 374        max_scan = qpt->nmaps - !offset;
 375        for (i = 0;;) {
 376                if (unlikely(!map->page)) {
 377                        get_map_page(qpt, map);
 378                        if (unlikely(!map->page))
 379                                break;
 380                }
 381                do {
 382                        if (!test_and_set_bit(offset, map->page)) {
 383                                qpt->last = qpn;
 384                                ret = qpn;
 385                                goto bail;
 386                        }
 387                        offset += qpt->incr;
 388                        /*
 389                         * This qpn might be bogus if offset >= BITS_PER_PAGE.
 390                         * That is OK.   It gets re-assigned below
 391                         */
 392                        qpn = mk_qpn(qpt, map, offset);
 393                } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
 394                /*
 395                 * In order to keep the number of pages allocated to a
 396                 * minimum, we scan the all existing pages before increasing
 397                 * the size of the bitmap table.
 398                 */
 399                if (++i > max_scan) {
 400                        if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
 401                                break;
 402                        map = &qpt->map[qpt->nmaps++];
 403                        /* start at incr with current bit 0 */
 404                        offset = qpt->incr | (offset & 1);
 405                } else if (map < &qpt->map[qpt->nmaps]) {
 406                        ++map;
 407                        /* start at incr with current bit 0 */
 408                        offset = qpt->incr | (offset & 1);
 409                } else {
 410                        map = &qpt->map[0];
 411                        /* wrap to first map page, invert bit 0 */
 412                        offset = qpt->incr | ((offset & 1) ^ 1);
 413                }
 414                /* there can be no set bits in low-order QoS bits */
 415                WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1));
 416                qpn = mk_qpn(qpt, map, offset);
 417        }
 418
 419        ret = -ENOMEM;
 420
 421bail:
 422        return ret;
 423}
 424
 425/**
 426 * rvt_clear_mr_refs - Drop help mr refs
 427 * @qp: rvt qp data structure
 428 * @clr_sends: If shoudl clear send side or not
 429 */
 430static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
 431{
 432        unsigned n;
 433        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
 434
 435        if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
 436                rvt_put_ss(&qp->s_rdma_read_sge);
 437
 438        rvt_put_ss(&qp->r_sge);
 439
 440        if (clr_sends) {
 441                while (qp->s_last != qp->s_head) {
 442                        struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 443
 444                        rvt_put_swqe(wqe);
 445
 446                        if (qp->ibqp.qp_type == IB_QPT_UD ||
 447                            qp->ibqp.qp_type == IB_QPT_SMI ||
 448                            qp->ibqp.qp_type == IB_QPT_GSI)
 449                                atomic_dec(&ibah_to_rvtah(
 450                                                wqe->ud_wr.ah)->refcount);
 451                        if (++qp->s_last >= qp->s_size)
 452                                qp->s_last = 0;
 453                        smp_wmb(); /* see qp_set_savail */
 454                }
 455                if (qp->s_rdma_mr) {
 456                        rvt_put_mr(qp->s_rdma_mr);
 457                        qp->s_rdma_mr = NULL;
 458                }
 459        }
 460
 461        for (n = 0; qp->s_ack_queue && n < rvt_max_atomic(rdi); n++) {
 462                struct rvt_ack_entry *e = &qp->s_ack_queue[n];
 463
 464                if (e->rdma_sge.mr) {
 465                        rvt_put_mr(e->rdma_sge.mr);
 466                        e->rdma_sge.mr = NULL;
 467                }
 468        }
 469}
 470
 471/**
 472 * rvt_swqe_has_lkey - return true if lkey is used by swqe
 473 * @wqe - the send wqe
 474 * @lkey - the lkey
 475 *
 476 * Test the swqe for using lkey
 477 */
 478static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey)
 479{
 480        int i;
 481
 482        for (i = 0; i < wqe->wr.num_sge; i++) {
 483                struct rvt_sge *sge = &wqe->sg_list[i];
 484
 485                if (rvt_mr_has_lkey(sge->mr, lkey))
 486                        return true;
 487        }
 488        return false;
 489}
 490
 491/**
 492 * rvt_qp_sends_has_lkey - return true is qp sends use lkey
 493 * @qp - the rvt_qp
 494 * @lkey - the lkey
 495 */
 496static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey)
 497{
 498        u32 s_last = qp->s_last;
 499
 500        while (s_last != qp->s_head) {
 501                struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, s_last);
 502
 503                if (rvt_swqe_has_lkey(wqe, lkey))
 504                        return true;
 505
 506                if (++s_last >= qp->s_size)
 507                        s_last = 0;
 508        }
 509        if (qp->s_rdma_mr)
 510                if (rvt_mr_has_lkey(qp->s_rdma_mr, lkey))
 511                        return true;
 512        return false;
 513}
 514
 515/**
 516 * rvt_qp_acks_has_lkey - return true if acks have lkey
 517 * @qp - the qp
 518 * @lkey - the lkey
 519 */
 520static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey)
 521{
 522        int i;
 523        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
 524
 525        for (i = 0; qp->s_ack_queue && i < rvt_max_atomic(rdi); i++) {
 526                struct rvt_ack_entry *e = &qp->s_ack_queue[i];
 527
 528                if (rvt_mr_has_lkey(e->rdma_sge.mr, lkey))
 529                        return true;
 530        }
 531        return false;
 532}
 533
 534/*
 535 * rvt_qp_mr_clean - clean up remote ops for lkey
 536 * @qp - the qp
 537 * @lkey - the lkey that is being de-registered
 538 *
 539 * This routine checks if the lkey is being used by
 540 * the qp.
 541 *
 542 * If so, the qp is put into an error state to elminate
 543 * any references from the qp.
 544 */
 545void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey)
 546{
 547        bool lastwqe = false;
 548
 549        if (qp->ibqp.qp_type == IB_QPT_SMI ||
 550            qp->ibqp.qp_type == IB_QPT_GSI)
 551                /* avoid special QPs */
 552                return;
 553        spin_lock_irq(&qp->r_lock);
 554        spin_lock(&qp->s_hlock);
 555        spin_lock(&qp->s_lock);
 556
 557        if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
 558                goto check_lwqe;
 559
 560        if (rvt_ss_has_lkey(&qp->r_sge, lkey) ||
 561            rvt_qp_sends_has_lkey(qp, lkey) ||
 562            rvt_qp_acks_has_lkey(qp, lkey))
 563                lastwqe = rvt_error_qp(qp, IB_WC_LOC_PROT_ERR);
 564check_lwqe:
 565        spin_unlock(&qp->s_lock);
 566        spin_unlock(&qp->s_hlock);
 567        spin_unlock_irq(&qp->r_lock);
 568        if (lastwqe) {
 569                struct ib_event ev;
 570
 571                ev.device = qp->ibqp.device;
 572                ev.element.qp = &qp->ibqp;
 573                ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
 574                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 575        }
 576}
 577
 578/**
 579 * rvt_remove_qp - remove qp form table
 580 * @rdi: rvt dev struct
 581 * @qp: qp to remove
 582 *
 583 * Remove the QP from the table so it can't be found asynchronously by
 584 * the receive routine.
 585 */
 586static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 587{
 588        struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
 589        u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
 590        unsigned long flags;
 591        int removed = 1;
 592
 593        spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
 594
 595        if (rcu_dereference_protected(rvp->qp[0],
 596                        lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
 597                RCU_INIT_POINTER(rvp->qp[0], NULL);
 598        } else if (rcu_dereference_protected(rvp->qp[1],
 599                        lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
 600                RCU_INIT_POINTER(rvp->qp[1], NULL);
 601        } else {
 602                struct rvt_qp *q;
 603                struct rvt_qp __rcu **qpp;
 604
 605                removed = 0;
 606                qpp = &rdi->qp_dev->qp_table[n];
 607                for (; (q = rcu_dereference_protected(*qpp,
 608                        lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL;
 609                        qpp = &q->next) {
 610                        if (q == qp) {
 611                                RCU_INIT_POINTER(*qpp,
 612                                     rcu_dereference_protected(qp->next,
 613                                     lockdep_is_held(&rdi->qp_dev->qpt_lock)));
 614                                removed = 1;
 615                                trace_rvt_qpremove(qp, n);
 616                                break;
 617                        }
 618                }
 619        }
 620
 621        spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
 622        if (removed) {
 623                synchronize_rcu();
 624                rvt_put_qp(qp);
 625        }
 626}
 627
 628/**
 629 * rvt_init_qp - initialize the QP state to the reset state
 630 * @qp: the QP to init or reinit
 631 * @type: the QP type
 632 *
 633 * This function is called from both rvt_create_qp() and
 634 * rvt_reset_qp().   The difference is that the reset
 635 * patch the necessary locks to protect against concurent
 636 * access.
 637 */
 638static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 639                        enum ib_qp_type type)
 640{
 641        qp->remote_qpn = 0;
 642        qp->qkey = 0;
 643        qp->qp_access_flags = 0;
 644        qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
 645        qp->s_hdrwords = 0;
 646        qp->s_wqe = NULL;
 647        qp->s_draining = 0;
 648        qp->s_next_psn = 0;
 649        qp->s_last_psn = 0;
 650        qp->s_sending_psn = 0;
 651        qp->s_sending_hpsn = 0;
 652        qp->s_psn = 0;
 653        qp->r_psn = 0;
 654        qp->r_msn = 0;
 655        if (type == IB_QPT_RC) {
 656                qp->s_state = IB_OPCODE_RC_SEND_LAST;
 657                qp->r_state = IB_OPCODE_RC_SEND_LAST;
 658        } else {
 659                qp->s_state = IB_OPCODE_UC_SEND_LAST;
 660                qp->r_state = IB_OPCODE_UC_SEND_LAST;
 661        }
 662        qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 663        qp->r_nak_state = 0;
 664        qp->r_aflags = 0;
 665        qp->r_flags = 0;
 666        qp->s_head = 0;
 667        qp->s_tail = 0;
 668        qp->s_cur = 0;
 669        qp->s_acked = 0;
 670        qp->s_last = 0;
 671        qp->s_ssn = 1;
 672        qp->s_lsn = 0;
 673        qp->s_mig_state = IB_MIG_MIGRATED;
 674        qp->r_head_ack_queue = 0;
 675        qp->s_tail_ack_queue = 0;
 676        qp->s_num_rd_atomic = 0;
 677        if (qp->r_rq.wq) {
 678                qp->r_rq.wq->head = 0;
 679                qp->r_rq.wq->tail = 0;
 680        }
 681        qp->r_sge.num_sge = 0;
 682        atomic_set(&qp->s_reserved_used, 0);
 683}
 684
 685/**
 686 * rvt_reset_qp - initialize the QP state to the reset state
 687 * @qp: the QP to reset
 688 * @type: the QP type
 689 *
 690 * r_lock, s_hlock, and s_lock are required to be held by the caller
 691 */
 692static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 693                         enum ib_qp_type type)
 694        __must_hold(&qp->s_lock)
 695        __must_hold(&qp->s_hlock)
 696        __must_hold(&qp->r_lock)
 697{
 698        lockdep_assert_held(&qp->r_lock);
 699        lockdep_assert_held(&qp->s_hlock);
 700        lockdep_assert_held(&qp->s_lock);
 701        if (qp->state != IB_QPS_RESET) {
 702                qp->state = IB_QPS_RESET;
 703
 704                /* Let drivers flush their waitlist */
 705                rdi->driver_f.flush_qp_waiters(qp);
 706                rvt_stop_rc_timers(qp);
 707                qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
 708                spin_unlock(&qp->s_lock);
 709                spin_unlock(&qp->s_hlock);
 710                spin_unlock_irq(&qp->r_lock);
 711
 712                /* Stop the send queue and the retry timer */
 713                rdi->driver_f.stop_send_queue(qp);
 714                rvt_del_timers_sync(qp);
 715                /* Wait for things to stop */
 716                rdi->driver_f.quiesce_qp(qp);
 717
 718                /* take qp out the hash and wait for it to be unused */
 719                rvt_remove_qp(rdi, qp);
 720
 721                /* grab the lock b/c it was locked at call time */
 722                spin_lock_irq(&qp->r_lock);
 723                spin_lock(&qp->s_hlock);
 724                spin_lock(&qp->s_lock);
 725
 726                rvt_clear_mr_refs(qp, 1);
 727                /*
 728                 * Let the driver do any tear down or re-init it needs to for
 729                 * a qp that has been reset
 730                 */
 731                rdi->driver_f.notify_qp_reset(qp);
 732        }
 733        rvt_init_qp(rdi, qp, type);
 734        lockdep_assert_held(&qp->r_lock);
 735        lockdep_assert_held(&qp->s_hlock);
 736        lockdep_assert_held(&qp->s_lock);
 737}
 738
 739/** rvt_free_qpn - Free a qpn from the bit map
 740 * @qpt: QP table
 741 * @qpn: queue pair number to free
 742 */
 743static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
 744{
 745        struct rvt_qpn_map *map;
 746
 747        map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE;
 748        if (map->page)
 749                clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
 750}
 751
 752/**
 753 * rvt_create_qp - create a queue pair for a device
 754 * @ibpd: the protection domain who's device we create the queue pair for
 755 * @init_attr: the attributes of the queue pair
 756 * @udata: user data for libibverbs.so
 757 *
 758 * Queue pair creation is mostly an rvt issue. However, drivers have their own
 759 * unique idea of what queue pair numbers mean. For instance there is a reserved
 760 * range for PSM.
 761 *
 762 * Return: the queue pair on success, otherwise returns an errno.
 763 *
 764 * Called by the ib_create_qp() core verbs function.
 765 */
 766struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 767                            struct ib_qp_init_attr *init_attr,
 768                            struct ib_udata *udata)
 769{
 770        struct rvt_qp *qp;
 771        int err;
 772        struct rvt_swqe *swq = NULL;
 773        size_t sz;
 774        size_t sg_list_sz;
 775        struct ib_qp *ret = ERR_PTR(-ENOMEM);
 776        struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
 777        void *priv = NULL;
 778        size_t sqsize;
 779
 780        if (!rdi)
 781                return ERR_PTR(-EINVAL);
 782
 783        if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge ||
 784            init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
 785            init_attr->create_flags)
 786                return ERR_PTR(-EINVAL);
 787
 788        /* Check receive queue parameters if no SRQ is specified. */
 789        if (!init_attr->srq) {
 790                if (init_attr->cap.max_recv_sge >
 791                    rdi->dparms.props.max_recv_sge ||
 792                    init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
 793                        return ERR_PTR(-EINVAL);
 794
 795                if (init_attr->cap.max_send_sge +
 796                    init_attr->cap.max_send_wr +
 797                    init_attr->cap.max_recv_sge +
 798                    init_attr->cap.max_recv_wr == 0)
 799                        return ERR_PTR(-EINVAL);
 800        }
 801        sqsize =
 802                init_attr->cap.max_send_wr + 1 +
 803                rdi->dparms.reserved_operations;
 804        switch (init_attr->qp_type) {
 805        case IB_QPT_SMI:
 806        case IB_QPT_GSI:
 807                if (init_attr->port_num == 0 ||
 808                    init_attr->port_num > ibpd->device->phys_port_cnt)
 809                        return ERR_PTR(-EINVAL);
 810                /* fall through */
 811        case IB_QPT_UC:
 812        case IB_QPT_RC:
 813        case IB_QPT_UD:
 814                sz = sizeof(struct rvt_sge) *
 815                        init_attr->cap.max_send_sge +
 816                        sizeof(struct rvt_swqe);
 817                swq = vzalloc_node(array_size(sz, sqsize), rdi->dparms.node);
 818                if (!swq)
 819                        return ERR_PTR(-ENOMEM);
 820
 821                sz = sizeof(*qp);
 822                sg_list_sz = 0;
 823                if (init_attr->srq) {
 824                        struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
 825
 826                        if (srq->rq.max_sge > 1)
 827                                sg_list_sz = sizeof(*qp->r_sg_list) *
 828                                        (srq->rq.max_sge - 1);
 829                } else if (init_attr->cap.max_recv_sge > 1)
 830                        sg_list_sz = sizeof(*qp->r_sg_list) *
 831                                (init_attr->cap.max_recv_sge - 1);
 832                qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL,
 833                                  rdi->dparms.node);
 834                if (!qp)
 835                        goto bail_swq;
 836
 837                RCU_INIT_POINTER(qp->next, NULL);
 838                if (init_attr->qp_type == IB_QPT_RC) {
 839                        qp->s_ack_queue =
 840                                kcalloc_node(rvt_max_atomic(rdi),
 841                                             sizeof(*qp->s_ack_queue),
 842                                             GFP_KERNEL,
 843                                             rdi->dparms.node);
 844                        if (!qp->s_ack_queue)
 845                                goto bail_qp;
 846                }
 847                /* initialize timers needed for rc qp */
 848                timer_setup(&qp->s_timer, rvt_rc_timeout, 0);
 849                hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC,
 850                             HRTIMER_MODE_REL);
 851                qp->s_rnr_timer.function = rvt_rc_rnr_retry;
 852
 853                /*
 854                 * Driver needs to set up it's private QP structure and do any
 855                 * initialization that is needed.
 856                 */
 857                priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
 858                if (IS_ERR(priv)) {
 859                        ret = priv;
 860                        goto bail_qp;
 861                }
 862                qp->priv = priv;
 863                qp->timeout_jiffies =
 864                        usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
 865                                1000UL);
 866                if (init_attr->srq) {
 867                        sz = 0;
 868                } else {
 869                        qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
 870                        qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
 871                        sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
 872                                sizeof(struct rvt_rwqe);
 873                        if (udata)
 874                                qp->r_rq.wq = vmalloc_user(
 875                                                sizeof(struct rvt_rwq) +
 876                                                qp->r_rq.size * sz);
 877                        else
 878                                qp->r_rq.wq = vzalloc_node(
 879                                                sizeof(struct rvt_rwq) +
 880                                                qp->r_rq.size * sz,
 881                                                rdi->dparms.node);
 882                        if (!qp->r_rq.wq)
 883                                goto bail_driver_priv;
 884                }
 885
 886                /*
 887                 * ib_create_qp() will initialize qp->ibqp
 888                 * except for qp->ibqp.qp_num.
 889                 */
 890                spin_lock_init(&qp->r_lock);
 891                spin_lock_init(&qp->s_hlock);
 892                spin_lock_init(&qp->s_lock);
 893                spin_lock_init(&qp->r_rq.lock);
 894                atomic_set(&qp->refcount, 0);
 895                atomic_set(&qp->local_ops_pending, 0);
 896                init_waitqueue_head(&qp->wait);
 897                INIT_LIST_HEAD(&qp->rspwait);
 898                qp->state = IB_QPS_RESET;
 899                qp->s_wq = swq;
 900                qp->s_size = sqsize;
 901                qp->s_avail = init_attr->cap.max_send_wr;
 902                qp->s_max_sge = init_attr->cap.max_send_sge;
 903                if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
 904                        qp->s_flags = RVT_S_SIGNAL_REQ_WR;
 905
 906                err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
 907                                init_attr->qp_type,
 908                                init_attr->port_num);
 909                if (err < 0) {
 910                        ret = ERR_PTR(err);
 911                        goto bail_rq_wq;
 912                }
 913                qp->ibqp.qp_num = err;
 914                qp->port_num = init_attr->port_num;
 915                rvt_init_qp(rdi, qp, init_attr->qp_type);
 916                break;
 917
 918        default:
 919                /* Don't support raw QPs */
 920                return ERR_PTR(-EINVAL);
 921        }
 922
 923        init_attr->cap.max_inline_data = 0;
 924
 925        /*
 926         * Return the address of the RWQ as the offset to mmap.
 927         * See rvt_mmap() for details.
 928         */
 929        if (udata && udata->outlen >= sizeof(__u64)) {
 930                if (!qp->r_rq.wq) {
 931                        __u64 offset = 0;
 932
 933                        err = ib_copy_to_udata(udata, &offset,
 934                                               sizeof(offset));
 935                        if (err) {
 936                                ret = ERR_PTR(err);
 937                                goto bail_qpn;
 938                        }
 939                } else {
 940                        u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
 941
 942                        qp->ip = rvt_create_mmap_info(rdi, s,
 943                                                      ibpd->uobject->context,
 944                                                      qp->r_rq.wq);
 945                        if (!qp->ip) {
 946                                ret = ERR_PTR(-ENOMEM);
 947                                goto bail_qpn;
 948                        }
 949
 950                        err = ib_copy_to_udata(udata, &qp->ip->offset,
 951                                               sizeof(qp->ip->offset));
 952                        if (err) {
 953                                ret = ERR_PTR(err);
 954                                goto bail_ip;
 955                        }
 956                }
 957                qp->pid = current->pid;
 958        }
 959
 960        spin_lock(&rdi->n_qps_lock);
 961        if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
 962                spin_unlock(&rdi->n_qps_lock);
 963                ret = ERR_PTR(-ENOMEM);
 964                goto bail_ip;
 965        }
 966
 967        rdi->n_qps_allocated++;
 968        /*
 969         * Maintain a busy_jiffies variable that will be added to the timeout
 970         * period in mod_retry_timer and add_retry_timer. This busy jiffies
 971         * is scaled by the number of rc qps created for the device to reduce
 972         * the number of timeouts occurring when there is a large number of
 973         * qps. busy_jiffies is incremented every rc qp scaling interval.
 974         * The scaling interval is selected based on extensive performance
 975         * evaluation of targeted workloads.
 976         */
 977        if (init_attr->qp_type == IB_QPT_RC) {
 978                rdi->n_rc_qps++;
 979                rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
 980        }
 981        spin_unlock(&rdi->n_qps_lock);
 982
 983        if (qp->ip) {
 984                spin_lock_irq(&rdi->pending_lock);
 985                list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
 986                spin_unlock_irq(&rdi->pending_lock);
 987        }
 988
 989        ret = &qp->ibqp;
 990
 991        /*
 992         * We have our QP and its good, now keep track of what types of opcodes
 993         * can be processed on this QP. We do this by keeping track of what the
 994         * 3 high order bits of the opcode are.
 995         */
 996        switch (init_attr->qp_type) {
 997        case IB_QPT_SMI:
 998        case IB_QPT_GSI:
 999        case IB_QPT_UD:
1000                qp->allowed_ops = IB_OPCODE_UD;
1001                break;
1002        case IB_QPT_RC:
1003                qp->allowed_ops = IB_OPCODE_RC;
1004                break;
1005        case IB_QPT_UC:
1006                qp->allowed_ops = IB_OPCODE_UC;
1007                break;
1008        default:
1009                ret = ERR_PTR(-EINVAL);
1010                goto bail_ip;
1011        }
1012
1013        return ret;
1014
1015bail_ip:
1016        if (qp->ip)
1017                kref_put(&qp->ip->ref, rvt_release_mmap_info);
1018
1019bail_qpn:
1020        rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
1021
1022bail_rq_wq:
1023        if (!qp->ip)
1024                vfree(qp->r_rq.wq);
1025
1026bail_driver_priv:
1027        rdi->driver_f.qp_priv_free(rdi, qp);
1028
1029bail_qp:
1030        kfree(qp->s_ack_queue);
1031        kfree(qp);
1032
1033bail_swq:
1034        vfree(swq);
1035
1036        return ret;
1037}
1038
1039/**
1040 * rvt_error_qp - put a QP into the error state
1041 * @qp: the QP to put into the error state
1042 * @err: the receive completion error to signal if a RWQE is active
1043 *
1044 * Flushes both send and receive work queues.
1045 *
1046 * Return: true if last WQE event should be generated.
1047 * The QP r_lock and s_lock should be held and interrupts disabled.
1048 * If we are already in error state, just return.
1049 */
1050int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
1051{
1052        struct ib_wc wc;
1053        int ret = 0;
1054        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
1055
1056        lockdep_assert_held(&qp->r_lock);
1057        lockdep_assert_held(&qp->s_lock);
1058        if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
1059                goto bail;
1060
1061        qp->state = IB_QPS_ERR;
1062
1063        if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1064                qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1065                del_timer(&qp->s_timer);
1066        }
1067
1068        if (qp->s_flags & RVT_S_ANY_WAIT_SEND)
1069                qp->s_flags &= ~RVT_S_ANY_WAIT_SEND;
1070
1071        rdi->driver_f.notify_error_qp(qp);
1072
1073        /* Schedule the sending tasklet to drain the send work queue. */
1074        if (READ_ONCE(qp->s_last) != qp->s_head)
1075                rdi->driver_f.schedule_send(qp);
1076
1077        rvt_clear_mr_refs(qp, 0);
1078
1079        memset(&wc, 0, sizeof(wc));
1080        wc.qp = &qp->ibqp;
1081        wc.opcode = IB_WC_RECV;
1082
1083        if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) {
1084                wc.wr_id = qp->r_wr_id;
1085                wc.status = err;
1086                rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1087        }
1088        wc.status = IB_WC_WR_FLUSH_ERR;
1089
1090        if (qp->r_rq.wq) {
1091                struct rvt_rwq *wq;
1092                u32 head;
1093                u32 tail;
1094
1095                spin_lock(&qp->r_rq.lock);
1096
1097                /* sanity check pointers before trusting them */
1098                wq = qp->r_rq.wq;
1099                head = wq->head;
1100                if (head >= qp->r_rq.size)
1101                        head = 0;
1102                tail = wq->tail;
1103                if (tail >= qp->r_rq.size)
1104                        tail = 0;
1105                while (tail != head) {
1106                        wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
1107                        if (++tail >= qp->r_rq.size)
1108                                tail = 0;
1109                        rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1110                }
1111                wq->tail = tail;
1112
1113                spin_unlock(&qp->r_rq.lock);
1114        } else if (qp->ibqp.event_handler) {
1115                ret = 1;
1116        }
1117
1118bail:
1119        return ret;
1120}
1121EXPORT_SYMBOL(rvt_error_qp);
1122
1123/*
1124 * Put the QP into the hash table.
1125 * The hash table holds a reference to the QP.
1126 */
1127static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
1128{
1129        struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
1130        unsigned long flags;
1131
1132        rvt_get_qp(qp);
1133        spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
1134
1135        if (qp->ibqp.qp_num <= 1) {
1136                rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp);
1137        } else {
1138                u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
1139
1140                qp->next = rdi->qp_dev->qp_table[n];
1141                rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp);
1142                trace_rvt_qpinsert(qp, n);
1143        }
1144
1145        spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
1146}
1147
1148/**
1149 * rvt_modify_qp - modify the attributes of a queue pair
1150 * @ibqp: the queue pair who's attributes we're modifying
1151 * @attr: the new attributes
1152 * @attr_mask: the mask of attributes to modify
1153 * @udata: user data for libibverbs.so
1154 *
1155 * Return: 0 on success, otherwise returns an errno.
1156 */
1157int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1158                  int attr_mask, struct ib_udata *udata)
1159{
1160        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1161        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1162        enum ib_qp_state cur_state, new_state;
1163        struct ib_event ev;
1164        int lastwqe = 0;
1165        int mig = 0;
1166        int pmtu = 0; /* for gcc warning only */
1167        enum rdma_link_layer link;
1168        int opa_ah;
1169
1170        link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
1171
1172        spin_lock_irq(&qp->r_lock);
1173        spin_lock(&qp->s_hlock);
1174        spin_lock(&qp->s_lock);
1175
1176        cur_state = attr_mask & IB_QP_CUR_STATE ?
1177                attr->cur_qp_state : qp->state;
1178        new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1179        opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num);
1180
1181        if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
1182                                attr_mask, link))
1183                goto inval;
1184
1185        if (rdi->driver_f.check_modify_qp &&
1186            rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata))
1187                goto inval;
1188
1189        if (attr_mask & IB_QP_AV) {
1190                if (opa_ah) {
1191                        if (rdma_ah_get_dlid(&attr->ah_attr) >=
1192                                opa_get_mcast_base(OPA_MCAST_NR))
1193                                goto inval;
1194                } else {
1195                        if (rdma_ah_get_dlid(&attr->ah_attr) >=
1196                                be16_to_cpu(IB_MULTICAST_LID_BASE))
1197                                goto inval;
1198                }
1199
1200                if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
1201                        goto inval;
1202        }
1203
1204        if (attr_mask & IB_QP_ALT_PATH) {
1205                if (opa_ah) {
1206                        if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
1207                                opa_get_mcast_base(OPA_MCAST_NR))
1208                                goto inval;
1209                } else {
1210                        if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
1211                                be16_to_cpu(IB_MULTICAST_LID_BASE))
1212                                goto inval;
1213                }
1214
1215                if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
1216                        goto inval;
1217                if (attr->alt_pkey_index >= rvt_get_npkeys(rdi))
1218                        goto inval;
1219        }
1220
1221        if (attr_mask & IB_QP_PKEY_INDEX)
1222                if (attr->pkey_index >= rvt_get_npkeys(rdi))
1223                        goto inval;
1224
1225        if (attr_mask & IB_QP_MIN_RNR_TIMER)
1226                if (attr->min_rnr_timer > 31)
1227                        goto inval;
1228
1229        if (attr_mask & IB_QP_PORT)
1230                if (qp->ibqp.qp_type == IB_QPT_SMI ||
1231                    qp->ibqp.qp_type == IB_QPT_GSI ||
1232                    attr->port_num == 0 ||
1233                    attr->port_num > ibqp->device->phys_port_cnt)
1234                        goto inval;
1235
1236        if (attr_mask & IB_QP_DEST_QPN)
1237                if (attr->dest_qp_num > RVT_QPN_MASK)
1238                        goto inval;
1239
1240        if (attr_mask & IB_QP_RETRY_CNT)
1241                if (attr->retry_cnt > 7)
1242                        goto inval;
1243
1244        if (attr_mask & IB_QP_RNR_RETRY)
1245                if (attr->rnr_retry > 7)
1246                        goto inval;
1247
1248        /*
1249         * Don't allow invalid path_mtu values.  OK to set greater
1250         * than the active mtu (or even the max_cap, if we have tuned
1251         * that to a small mtu.  We'll set qp->path_mtu
1252         * to the lesser of requested attribute mtu and active,
1253         * for packetizing messages.
1254         * Note that the QP port has to be set in INIT and MTU in RTR.
1255         */
1256        if (attr_mask & IB_QP_PATH_MTU) {
1257                pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr);
1258                if (pmtu < 0)
1259                        goto inval;
1260        }
1261
1262        if (attr_mask & IB_QP_PATH_MIG_STATE) {
1263                if (attr->path_mig_state == IB_MIG_REARM) {
1264                        if (qp->s_mig_state == IB_MIG_ARMED)
1265                                goto inval;
1266                        if (new_state != IB_QPS_RTS)
1267                                goto inval;
1268                } else if (attr->path_mig_state == IB_MIG_MIGRATED) {
1269                        if (qp->s_mig_state == IB_MIG_REARM)
1270                                goto inval;
1271                        if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
1272                                goto inval;
1273                        if (qp->s_mig_state == IB_MIG_ARMED)
1274                                mig = 1;
1275                } else {
1276                        goto inval;
1277                }
1278        }
1279
1280        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1281                if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic)
1282                        goto inval;
1283
1284        switch (new_state) {
1285        case IB_QPS_RESET:
1286                if (qp->state != IB_QPS_RESET)
1287                        rvt_reset_qp(rdi, qp, ibqp->qp_type);
1288                break;
1289
1290        case IB_QPS_RTR:
1291                /* Allow event to re-trigger if QP set to RTR more than once */
1292                qp->r_flags &= ~RVT_R_COMM_EST;
1293                qp->state = new_state;
1294                break;
1295
1296        case IB_QPS_SQD:
1297                qp->s_draining = qp->s_last != qp->s_cur;
1298                qp->state = new_state;
1299                break;
1300
1301        case IB_QPS_SQE:
1302                if (qp->ibqp.qp_type == IB_QPT_RC)
1303                        goto inval;
1304                qp->state = new_state;
1305                break;
1306
1307        case IB_QPS_ERR:
1308                lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1309                break;
1310
1311        default:
1312                qp->state = new_state;
1313                break;
1314        }
1315
1316        if (attr_mask & IB_QP_PKEY_INDEX)
1317                qp->s_pkey_index = attr->pkey_index;
1318
1319        if (attr_mask & IB_QP_PORT)
1320                qp->port_num = attr->port_num;
1321
1322        if (attr_mask & IB_QP_DEST_QPN)
1323                qp->remote_qpn = attr->dest_qp_num;
1324
1325        if (attr_mask & IB_QP_SQ_PSN) {
1326                qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask;
1327                qp->s_psn = qp->s_next_psn;
1328                qp->s_sending_psn = qp->s_next_psn;
1329                qp->s_last_psn = qp->s_next_psn - 1;
1330                qp->s_sending_hpsn = qp->s_last_psn;
1331        }
1332
1333        if (attr_mask & IB_QP_RQ_PSN)
1334                qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask;
1335
1336        if (attr_mask & IB_QP_ACCESS_FLAGS)
1337                qp->qp_access_flags = attr->qp_access_flags;
1338
1339        if (attr_mask & IB_QP_AV) {
1340                rdma_replace_ah_attr(&qp->remote_ah_attr, &attr->ah_attr);
1341                qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr);
1342                qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
1343        }
1344
1345        if (attr_mask & IB_QP_ALT_PATH) {
1346                rdma_replace_ah_attr(&qp->alt_ah_attr, &attr->alt_ah_attr);
1347                qp->s_alt_pkey_index = attr->alt_pkey_index;
1348        }
1349
1350        if (attr_mask & IB_QP_PATH_MIG_STATE) {
1351                qp->s_mig_state = attr->path_mig_state;
1352                if (mig) {
1353                        qp->remote_ah_attr = qp->alt_ah_attr;
1354                        qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
1355                        qp->s_pkey_index = qp->s_alt_pkey_index;
1356                }
1357        }
1358
1359        if (attr_mask & IB_QP_PATH_MTU) {
1360                qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
1361                qp->log_pmtu = ilog2(qp->pmtu);
1362        }
1363
1364        if (attr_mask & IB_QP_RETRY_CNT) {
1365                qp->s_retry_cnt = attr->retry_cnt;
1366                qp->s_retry = attr->retry_cnt;
1367        }
1368
1369        if (attr_mask & IB_QP_RNR_RETRY) {
1370                qp->s_rnr_retry_cnt = attr->rnr_retry;
1371                qp->s_rnr_retry = attr->rnr_retry;
1372        }
1373
1374        if (attr_mask & IB_QP_MIN_RNR_TIMER)
1375                qp->r_min_rnr_timer = attr->min_rnr_timer;
1376
1377        if (attr_mask & IB_QP_TIMEOUT) {
1378                qp->timeout = attr->timeout;
1379                qp->timeout_jiffies = rvt_timeout_to_jiffies(qp->timeout);
1380        }
1381
1382        if (attr_mask & IB_QP_QKEY)
1383                qp->qkey = attr->qkey;
1384
1385        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1386                qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
1387
1388        if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
1389                qp->s_max_rd_atomic = attr->max_rd_atomic;
1390
1391        if (rdi->driver_f.modify_qp)
1392                rdi->driver_f.modify_qp(qp, attr, attr_mask, udata);
1393
1394        spin_unlock(&qp->s_lock);
1395        spin_unlock(&qp->s_hlock);
1396        spin_unlock_irq(&qp->r_lock);
1397
1398        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1399                rvt_insert_qp(rdi, qp);
1400
1401        if (lastwqe) {
1402                ev.device = qp->ibqp.device;
1403                ev.element.qp = &qp->ibqp;
1404                ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1405                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1406        }
1407        if (mig) {
1408                ev.device = qp->ibqp.device;
1409                ev.element.qp = &qp->ibqp;
1410                ev.event = IB_EVENT_PATH_MIG;
1411                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1412        }
1413        return 0;
1414
1415inval:
1416        spin_unlock(&qp->s_lock);
1417        spin_unlock(&qp->s_hlock);
1418        spin_unlock_irq(&qp->r_lock);
1419        return -EINVAL;
1420}
1421
1422/**
1423 * rvt_destroy_qp - destroy a queue pair
1424 * @ibqp: the queue pair to destroy
1425 *
1426 * Note that this can be called while the QP is actively sending or
1427 * receiving!
1428 *
1429 * Return: 0 on success.
1430 */
1431int rvt_destroy_qp(struct ib_qp *ibqp)
1432{
1433        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1434        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1435
1436        spin_lock_irq(&qp->r_lock);
1437        spin_lock(&qp->s_hlock);
1438        spin_lock(&qp->s_lock);
1439        rvt_reset_qp(rdi, qp, ibqp->qp_type);
1440        spin_unlock(&qp->s_lock);
1441        spin_unlock(&qp->s_hlock);
1442        spin_unlock_irq(&qp->r_lock);
1443
1444        wait_event(qp->wait, !atomic_read(&qp->refcount));
1445        /* qpn is now available for use again */
1446        rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
1447
1448        spin_lock(&rdi->n_qps_lock);
1449        rdi->n_qps_allocated--;
1450        if (qp->ibqp.qp_type == IB_QPT_RC) {
1451                rdi->n_rc_qps--;
1452                rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
1453        }
1454        spin_unlock(&rdi->n_qps_lock);
1455
1456        if (qp->ip)
1457                kref_put(&qp->ip->ref, rvt_release_mmap_info);
1458        else
1459                vfree(qp->r_rq.wq);
1460        vfree(qp->s_wq);
1461        rdi->driver_f.qp_priv_free(rdi, qp);
1462        kfree(qp->s_ack_queue);
1463        rdma_destroy_ah_attr(&qp->remote_ah_attr);
1464        rdma_destroy_ah_attr(&qp->alt_ah_attr);
1465        kfree(qp);
1466        return 0;
1467}
1468
1469/**
1470 * rvt_query_qp - query an ipbq
1471 * @ibqp: IB qp to query
1472 * @attr: attr struct to fill in
1473 * @attr_mask: attr mask ignored
1474 * @init_attr: struct to fill in
1475 *
1476 * Return: always 0
1477 */
1478int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1479                 int attr_mask, struct ib_qp_init_attr *init_attr)
1480{
1481        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1482        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1483
1484        attr->qp_state = qp->state;
1485        attr->cur_qp_state = attr->qp_state;
1486        attr->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
1487        attr->path_mig_state = qp->s_mig_state;
1488        attr->qkey = qp->qkey;
1489        attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask;
1490        attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask;
1491        attr->dest_qp_num = qp->remote_qpn;
1492        attr->qp_access_flags = qp->qp_access_flags;
1493        attr->cap.max_send_wr = qp->s_size - 1 -
1494                rdi->dparms.reserved_operations;
1495        attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
1496        attr->cap.max_send_sge = qp->s_max_sge;
1497        attr->cap.max_recv_sge = qp->r_rq.max_sge;
1498        attr->cap.max_inline_data = 0;
1499        attr->ah_attr = qp->remote_ah_attr;
1500        attr->alt_ah_attr = qp->alt_ah_attr;
1501        attr->pkey_index = qp->s_pkey_index;
1502        attr->alt_pkey_index = qp->s_alt_pkey_index;
1503        attr->en_sqd_async_notify = 0;
1504        attr->sq_draining = qp->s_draining;
1505        attr->max_rd_atomic = qp->s_max_rd_atomic;
1506        attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
1507        attr->min_rnr_timer = qp->r_min_rnr_timer;
1508        attr->port_num = qp->port_num;
1509        attr->timeout = qp->timeout;
1510        attr->retry_cnt = qp->s_retry_cnt;
1511        attr->rnr_retry = qp->s_rnr_retry_cnt;
1512        attr->alt_port_num =
1513                rdma_ah_get_port_num(&qp->alt_ah_attr);
1514        attr->alt_timeout = qp->alt_timeout;
1515
1516        init_attr->event_handler = qp->ibqp.event_handler;
1517        init_attr->qp_context = qp->ibqp.qp_context;
1518        init_attr->send_cq = qp->ibqp.send_cq;
1519        init_attr->recv_cq = qp->ibqp.recv_cq;
1520        init_attr->srq = qp->ibqp.srq;
1521        init_attr->cap = attr->cap;
1522        if (qp->s_flags & RVT_S_SIGNAL_REQ_WR)
1523                init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
1524        else
1525                init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1526        init_attr->qp_type = qp->ibqp.qp_type;
1527        init_attr->port_num = qp->port_num;
1528        return 0;
1529}
1530
1531/**
1532 * rvt_post_receive - post a receive on a QP
1533 * @ibqp: the QP to post the receive on
1534 * @wr: the WR to post
1535 * @bad_wr: the first bad WR is put here
1536 *
1537 * This may be called from interrupt context.
1538 *
1539 * Return: 0 on success otherwise errno
1540 */
1541int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1542                  const struct ib_recv_wr **bad_wr)
1543{
1544        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1545        struct rvt_rwq *wq = qp->r_rq.wq;
1546        unsigned long flags;
1547        int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) &&
1548                                !qp->ibqp.srq;
1549
1550        /* Check that state is OK to post receive. */
1551        if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) {
1552                *bad_wr = wr;
1553                return -EINVAL;
1554        }
1555
1556        for (; wr; wr = wr->next) {
1557                struct rvt_rwqe *wqe;
1558                u32 next;
1559                int i;
1560
1561                if ((unsigned)wr->num_sge > qp->r_rq.max_sge) {
1562                        *bad_wr = wr;
1563                        return -EINVAL;
1564                }
1565
1566                spin_lock_irqsave(&qp->r_rq.lock, flags);
1567                next = wq->head + 1;
1568                if (next >= qp->r_rq.size)
1569                        next = 0;
1570                if (next == wq->tail) {
1571                        spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1572                        *bad_wr = wr;
1573                        return -ENOMEM;
1574                }
1575                if (unlikely(qp_err_flush)) {
1576                        struct ib_wc wc;
1577
1578                        memset(&wc, 0, sizeof(wc));
1579                        wc.qp = &qp->ibqp;
1580                        wc.opcode = IB_WC_RECV;
1581                        wc.wr_id = wr->wr_id;
1582                        wc.status = IB_WC_WR_FLUSH_ERR;
1583                        rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1584                } else {
1585                        wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head);
1586                        wqe->wr_id = wr->wr_id;
1587                        wqe->num_sge = wr->num_sge;
1588                        for (i = 0; i < wr->num_sge; i++)
1589                                wqe->sg_list[i] = wr->sg_list[i];
1590                        /*
1591                         * Make sure queue entry is written
1592                         * before the head index.
1593                         */
1594                        smp_wmb();
1595                        wq->head = next;
1596                }
1597                spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1598        }
1599        return 0;
1600}
1601
1602/**
1603 * rvt_qp_valid_operation - validate post send wr request
1604 * @qp - the qp
1605 * @post-parms - the post send table for the driver
1606 * @wr - the work request
1607 *
1608 * The routine validates the operation based on the
1609 * validation table an returns the length of the operation
1610 * which can extend beyond the ib_send_bw.  Operation
1611 * dependent flags key atomic operation validation.
1612 *
1613 * There is an exception for UD qps that validates the pd and
1614 * overrides the length to include the additional UD specific
1615 * length.
1616 *
1617 * Returns a negative error or the length of the work request
1618 * for building the swqe.
1619 */
1620static inline int rvt_qp_valid_operation(
1621        struct rvt_qp *qp,
1622        const struct rvt_operation_params *post_parms,
1623        const struct ib_send_wr *wr)
1624{
1625        int len;
1626
1627        if (wr->opcode >= RVT_OPERATION_MAX || !post_parms[wr->opcode].length)
1628                return -EINVAL;
1629        if (!(post_parms[wr->opcode].qpt_support & BIT(qp->ibqp.qp_type)))
1630                return -EINVAL;
1631        if ((post_parms[wr->opcode].flags & RVT_OPERATION_PRIV) &&
1632            ibpd_to_rvtpd(qp->ibqp.pd)->user)
1633                return -EINVAL;
1634        if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC_SGE &&
1635            (wr->num_sge == 0 ||
1636             wr->sg_list[0].length < sizeof(u64) ||
1637             wr->sg_list[0].addr & (sizeof(u64) - 1)))
1638                return -EINVAL;
1639        if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC &&
1640            !qp->s_max_rd_atomic)
1641                return -EINVAL;
1642        len = post_parms[wr->opcode].length;
1643        /* UD specific */
1644        if (qp->ibqp.qp_type != IB_QPT_UC &&
1645            qp->ibqp.qp_type != IB_QPT_RC) {
1646                if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
1647                        return -EINVAL;
1648                len = sizeof(struct ib_ud_wr);
1649        }
1650        return len;
1651}
1652
1653/**
1654 * rvt_qp_is_avail - determine queue capacity
1655 * @qp: the qp
1656 * @rdi: the rdmavt device
1657 * @reserved_op: is reserved operation
1658 *
1659 * This assumes the s_hlock is held but the s_last
1660 * qp variable is uncontrolled.
1661 *
1662 * For non reserved operations, the qp->s_avail
1663 * may be changed.
1664 *
1665 * The return value is zero or a -ENOMEM.
1666 */
1667static inline int rvt_qp_is_avail(
1668        struct rvt_qp *qp,
1669        struct rvt_dev_info *rdi,
1670        bool reserved_op)
1671{
1672        u32 slast;
1673        u32 avail;
1674        u32 reserved_used;
1675
1676        /* see rvt_qp_wqe_unreserve() */
1677        smp_mb__before_atomic();
1678        reserved_used = atomic_read(&qp->s_reserved_used);
1679        if (unlikely(reserved_op)) {
1680                /* see rvt_qp_wqe_unreserve() */
1681                smp_mb__before_atomic();
1682                if (reserved_used >= rdi->dparms.reserved_operations)
1683                        return -ENOMEM;
1684                return 0;
1685        }
1686        /* non-reserved operations */
1687        if (likely(qp->s_avail))
1688                return 0;
1689        slast = READ_ONCE(qp->s_last);
1690        if (qp->s_head >= slast)
1691                avail = qp->s_size - (qp->s_head - slast);
1692        else
1693                avail = slast - qp->s_head;
1694
1695        /* see rvt_qp_wqe_unreserve() */
1696        smp_mb__before_atomic();
1697        reserved_used = atomic_read(&qp->s_reserved_used);
1698        avail =  avail - 1 -
1699                (rdi->dparms.reserved_operations - reserved_used);
1700        /* insure we don't assign a negative s_avail */
1701        if ((s32)avail <= 0)
1702                return -ENOMEM;
1703        qp->s_avail = avail;
1704        if (WARN_ON(qp->s_avail >
1705                    (qp->s_size - 1 - rdi->dparms.reserved_operations)))
1706                rvt_pr_err(rdi,
1707                           "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u",
1708                           qp->ibqp.qp_num, qp->s_size, qp->s_avail,
1709                           qp->s_head, qp->s_tail, qp->s_cur,
1710                           qp->s_acked, qp->s_last);
1711        return 0;
1712}
1713
1714/**
1715 * rvt_post_one_wr - post one RC, UC, or UD send work request
1716 * @qp: the QP to post on
1717 * @wr: the work request to send
1718 */
1719static int rvt_post_one_wr(struct rvt_qp *qp,
1720                           const struct ib_send_wr *wr,
1721                           int *call_send)
1722{
1723        struct rvt_swqe *wqe;
1724        u32 next;
1725        int i;
1726        int j;
1727        int acc;
1728        struct rvt_lkey_table *rkt;
1729        struct rvt_pd *pd;
1730        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
1731        u8 log_pmtu;
1732        int ret;
1733        size_t cplen;
1734        bool reserved_op;
1735        int local_ops_delayed = 0;
1736
1737        BUILD_BUG_ON(IB_QPT_MAX >= (sizeof(u32) * BITS_PER_BYTE));
1738
1739        /* IB spec says that num_sge == 0 is OK. */
1740        if (unlikely(wr->num_sge > qp->s_max_sge))
1741                return -EINVAL;
1742
1743        ret = rvt_qp_valid_operation(qp, rdi->post_parms, wr);
1744        if (ret < 0)
1745                return ret;
1746        cplen = ret;
1747
1748        /*
1749         * Local operations include fast register and local invalidate.
1750         * Fast register needs to be processed immediately because the
1751         * registered lkey may be used by following work requests and the
1752         * lkey needs to be valid at the time those requests are posted.
1753         * Local invalidate can be processed immediately if fencing is
1754         * not required and no previous local invalidate ops are pending.
1755         * Signaled local operations that have been processed immediately
1756         * need to have requests with "completion only" flags set posted
1757         * to the send queue in order to generate completions.
1758         */
1759        if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) {
1760                switch (wr->opcode) {
1761                case IB_WR_REG_MR:
1762                        ret = rvt_fast_reg_mr(qp,
1763                                              reg_wr(wr)->mr,
1764                                              reg_wr(wr)->key,
1765                                              reg_wr(wr)->access);
1766                        if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
1767                                return ret;
1768                        break;
1769                case IB_WR_LOCAL_INV:
1770                        if ((wr->send_flags & IB_SEND_FENCE) ||
1771                            atomic_read(&qp->local_ops_pending)) {
1772                                local_ops_delayed = 1;
1773                        } else {
1774                                ret = rvt_invalidate_rkey(
1775                                        qp, wr->ex.invalidate_rkey);
1776                                if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
1777                                        return ret;
1778                        }
1779                        break;
1780                default:
1781                        return -EINVAL;
1782                }
1783        }
1784
1785        reserved_op = rdi->post_parms[wr->opcode].flags &
1786                        RVT_OPERATION_USE_RESERVE;
1787        /* check for avail */
1788        ret = rvt_qp_is_avail(qp, rdi, reserved_op);
1789        if (ret)
1790                return ret;
1791        next = qp->s_head + 1;
1792        if (next >= qp->s_size)
1793                next = 0;
1794
1795        rkt = &rdi->lkey_table;
1796        pd = ibpd_to_rvtpd(qp->ibqp.pd);
1797        wqe = rvt_get_swqe_ptr(qp, qp->s_head);
1798
1799        /* cplen has length from above */
1800        memcpy(&wqe->wr, wr, cplen);
1801
1802        wqe->length = 0;
1803        j = 0;
1804        if (wr->num_sge) {
1805                struct rvt_sge *last_sge = NULL;
1806
1807                acc = wr->opcode >= IB_WR_RDMA_READ ?
1808                        IB_ACCESS_LOCAL_WRITE : 0;
1809                for (i = 0; i < wr->num_sge; i++) {
1810                        u32 length = wr->sg_list[i].length;
1811
1812                        if (length == 0)
1813                                continue;
1814                        ret = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge,
1815                                          &wr->sg_list[i], acc);
1816                        if (unlikely(ret < 0))
1817                                goto bail_inval_free;
1818                        wqe->length += length;
1819                        if (ret)
1820                                last_sge = &wqe->sg_list[j];
1821                        j += ret;
1822                }
1823                wqe->wr.num_sge = j;
1824        }
1825
1826        /* general part of wqe valid - allow for driver checks */
1827        if (rdi->driver_f.check_send_wqe) {
1828                ret = rdi->driver_f.check_send_wqe(qp, wqe);
1829                if (ret < 0)
1830                        goto bail_inval_free;
1831                if (ret)
1832                        *call_send = ret;
1833        }
1834
1835        log_pmtu = qp->log_pmtu;
1836        if (qp->ibqp.qp_type != IB_QPT_UC &&
1837            qp->ibqp.qp_type != IB_QPT_RC) {
1838                struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
1839
1840                log_pmtu = ah->log_pmtu;
1841                atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
1842        }
1843
1844        if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) {
1845                if (local_ops_delayed)
1846                        atomic_inc(&qp->local_ops_pending);
1847                else
1848                        wqe->wr.send_flags |= RVT_SEND_COMPLETION_ONLY;
1849                wqe->ssn = 0;
1850                wqe->psn = 0;
1851                wqe->lpsn = 0;
1852        } else {
1853                wqe->ssn = qp->s_ssn++;
1854                wqe->psn = qp->s_next_psn;
1855                wqe->lpsn = wqe->psn +
1856                                (wqe->length ?
1857                                        ((wqe->length - 1) >> log_pmtu) :
1858                                        0);
1859                qp->s_next_psn = wqe->lpsn + 1;
1860        }
1861        if (unlikely(reserved_op)) {
1862                wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
1863                rvt_qp_wqe_reserve(qp, wqe);
1864        } else {
1865                wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
1866                qp->s_avail--;
1867        }
1868        trace_rvt_post_one_wr(qp, wqe, wr->num_sge);
1869        smp_wmb(); /* see request builders */
1870        qp->s_head = next;
1871
1872        return 0;
1873
1874bail_inval_free:
1875        /* release mr holds */
1876        while (j) {
1877                struct rvt_sge *sge = &wqe->sg_list[--j];
1878
1879                rvt_put_mr(sge->mr);
1880        }
1881        return ret;
1882}
1883
1884/**
1885 * rvt_post_send - post a send on a QP
1886 * @ibqp: the QP to post the send on
1887 * @wr: the list of work requests to post
1888 * @bad_wr: the first bad WR is put here
1889 *
1890 * This may be called from interrupt context.
1891 *
1892 * Return: 0 on success else errno
1893 */
1894int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1895                  const struct ib_send_wr **bad_wr)
1896{
1897        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1898        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1899        unsigned long flags = 0;
1900        int call_send;
1901        unsigned nreq = 0;
1902        int err = 0;
1903
1904        spin_lock_irqsave(&qp->s_hlock, flags);
1905
1906        /*
1907         * Ensure QP state is such that we can send. If not bail out early,
1908         * there is no need to do this every time we post a send.
1909         */
1910        if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
1911                spin_unlock_irqrestore(&qp->s_hlock, flags);
1912                return -EINVAL;
1913        }
1914
1915        /*
1916         * If the send queue is empty, and we only have a single WR then just go
1917         * ahead and kick the send engine into gear. Otherwise we will always
1918         * just schedule the send to happen later.
1919         */
1920        call_send = qp->s_head == READ_ONCE(qp->s_last) && !wr->next;
1921
1922        for (; wr; wr = wr->next) {
1923                err = rvt_post_one_wr(qp, wr, &call_send);
1924                if (unlikely(err)) {
1925                        *bad_wr = wr;
1926                        goto bail;
1927                }
1928                nreq++;
1929        }
1930bail:
1931        spin_unlock_irqrestore(&qp->s_hlock, flags);
1932        if (nreq) {
1933                if (call_send)
1934                        rdi->driver_f.do_send(qp);
1935                else
1936                        rdi->driver_f.schedule_send_no_lock(qp);
1937        }
1938        return err;
1939}
1940
1941/**
1942 * rvt_post_srq_receive - post a receive on a shared receive queue
1943 * @ibsrq: the SRQ to post the receive on
1944 * @wr: the list of work requests to post
1945 * @bad_wr: A pointer to the first WR to cause a problem is put here
1946 *
1947 * This may be called from interrupt context.
1948 *
1949 * Return: 0 on success else errno
1950 */
1951int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
1952                      const struct ib_recv_wr **bad_wr)
1953{
1954        struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
1955        struct rvt_rwq *wq;
1956        unsigned long flags;
1957
1958        for (; wr; wr = wr->next) {
1959                struct rvt_rwqe *wqe;
1960                u32 next;
1961                int i;
1962
1963                if ((unsigned)wr->num_sge > srq->rq.max_sge) {
1964                        *bad_wr = wr;
1965                        return -EINVAL;
1966                }
1967
1968                spin_lock_irqsave(&srq->rq.lock, flags);
1969                wq = srq->rq.wq;
1970                next = wq->head + 1;
1971                if (next >= srq->rq.size)
1972                        next = 0;
1973                if (next == wq->tail) {
1974                        spin_unlock_irqrestore(&srq->rq.lock, flags);
1975                        *bad_wr = wr;
1976                        return -ENOMEM;
1977                }
1978
1979                wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head);
1980                wqe->wr_id = wr->wr_id;
1981                wqe->num_sge = wr->num_sge;
1982                for (i = 0; i < wr->num_sge; i++)
1983                        wqe->sg_list[i] = wr->sg_list[i];
1984                /* Make sure queue entry is written before the head index. */
1985                smp_wmb();
1986                wq->head = next;
1987                spin_unlock_irqrestore(&srq->rq.lock, flags);
1988        }
1989        return 0;
1990}
1991
1992/*
1993 * Validate a RWQE and fill in the SGE state.
1994 * Return 1 if OK.
1995 */
1996static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
1997{
1998        int i, j, ret;
1999        struct ib_wc wc;
2000        struct rvt_lkey_table *rkt;
2001        struct rvt_pd *pd;
2002        struct rvt_sge_state *ss;
2003        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2004
2005        rkt = &rdi->lkey_table;
2006        pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
2007        ss = &qp->r_sge;
2008        ss->sg_list = qp->r_sg_list;
2009        qp->r_len = 0;
2010        for (i = j = 0; i < wqe->num_sge; i++) {
2011                if (wqe->sg_list[i].length == 0)
2012                        continue;
2013                /* Check LKEY */
2014                ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
2015                                  NULL, &wqe->sg_list[i],
2016                                  IB_ACCESS_LOCAL_WRITE);
2017                if (unlikely(ret <= 0))
2018                        goto bad_lkey;
2019                qp->r_len += wqe->sg_list[i].length;
2020                j++;
2021        }
2022        ss->num_sge = j;
2023        ss->total_len = qp->r_len;
2024        return 1;
2025
2026bad_lkey:
2027        while (j) {
2028                struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
2029
2030                rvt_put_mr(sge->mr);
2031        }
2032        ss->num_sge = 0;
2033        memset(&wc, 0, sizeof(wc));
2034        wc.wr_id = wqe->wr_id;
2035        wc.status = IB_WC_LOC_PROT_ERR;
2036        wc.opcode = IB_WC_RECV;
2037        wc.qp = &qp->ibqp;
2038        /* Signal solicited completion event. */
2039        rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
2040        return 0;
2041}
2042
2043/**
2044 * rvt_get_rwqe - copy the next RWQE into the QP's RWQE
2045 * @qp: the QP
2046 * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
2047 *
2048 * Return -1 if there is a local error, 0 if no RWQE is available,
2049 * otherwise return 1.
2050 *
2051 * Can be called from interrupt level.
2052 */
2053int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only)
2054{
2055        unsigned long flags;
2056        struct rvt_rq *rq;
2057        struct rvt_rwq *wq;
2058        struct rvt_srq *srq;
2059        struct rvt_rwqe *wqe;
2060        void (*handler)(struct ib_event *, void *);
2061        u32 tail;
2062        int ret;
2063
2064        if (qp->ibqp.srq) {
2065                srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
2066                handler = srq->ibsrq.event_handler;
2067                rq = &srq->rq;
2068        } else {
2069                srq = NULL;
2070                handler = NULL;
2071                rq = &qp->r_rq;
2072        }
2073
2074        spin_lock_irqsave(&rq->lock, flags);
2075        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
2076                ret = 0;
2077                goto unlock;
2078        }
2079
2080        wq = rq->wq;
2081        tail = wq->tail;
2082        /* Validate tail before using it since it is user writable. */
2083        if (tail >= rq->size)
2084                tail = 0;
2085        if (unlikely(tail == wq->head)) {
2086                ret = 0;
2087                goto unlock;
2088        }
2089        /* Make sure entry is read after head index is read. */
2090        smp_rmb();
2091        wqe = rvt_get_rwqe_ptr(rq, tail);
2092        /*
2093         * Even though we update the tail index in memory, the verbs
2094         * consumer is not supposed to post more entries until a
2095         * completion is generated.
2096         */
2097        if (++tail >= rq->size)
2098                tail = 0;
2099        wq->tail = tail;
2100        if (!wr_id_only && !init_sge(qp, wqe)) {
2101                ret = -1;
2102                goto unlock;
2103        }
2104        qp->r_wr_id = wqe->wr_id;
2105
2106        ret = 1;
2107        set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
2108        if (handler) {
2109                u32 n;
2110
2111                /*
2112                 * Validate head pointer value and compute
2113                 * the number of remaining WQEs.
2114                 */
2115                n = wq->head;
2116                if (n >= rq->size)
2117                        n = 0;
2118                if (n < tail)
2119                        n += rq->size - tail;
2120                else
2121                        n -= tail;
2122                if (n < srq->limit) {
2123                        struct ib_event ev;
2124
2125                        srq->limit = 0;
2126                        spin_unlock_irqrestore(&rq->lock, flags);
2127                        ev.device = qp->ibqp.device;
2128                        ev.element.srq = qp->ibqp.srq;
2129                        ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
2130                        handler(&ev, srq->ibsrq.srq_context);
2131                        goto bail;
2132                }
2133        }
2134unlock:
2135        spin_unlock_irqrestore(&rq->lock, flags);
2136bail:
2137        return ret;
2138}
2139EXPORT_SYMBOL(rvt_get_rwqe);
2140
2141/**
2142 * qp_comm_est - handle trap with QP established
2143 * @qp: the QP
2144 */
2145void rvt_comm_est(struct rvt_qp *qp)
2146{
2147        qp->r_flags |= RVT_R_COMM_EST;
2148        if (qp->ibqp.event_handler) {
2149                struct ib_event ev;
2150
2151                ev.device = qp->ibqp.device;
2152                ev.element.qp = &qp->ibqp;
2153                ev.event = IB_EVENT_COMM_EST;
2154                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
2155        }
2156}
2157EXPORT_SYMBOL(rvt_comm_est);
2158
2159void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
2160{
2161        unsigned long flags;
2162        int lastwqe;
2163
2164        spin_lock_irqsave(&qp->s_lock, flags);
2165        lastwqe = rvt_error_qp(qp, err);
2166        spin_unlock_irqrestore(&qp->s_lock, flags);
2167
2168        if (lastwqe) {
2169                struct ib_event ev;
2170
2171                ev.device = qp->ibqp.device;
2172                ev.element.qp = &qp->ibqp;
2173                ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
2174                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
2175        }
2176}
2177EXPORT_SYMBOL(rvt_rc_error);
2178
2179/*
2180 *  rvt_rnr_tbl_to_usec - return index into ib_rvt_rnr_table
2181 *  @index - the index
2182 *  return usec from an index into ib_rvt_rnr_table
2183 */
2184unsigned long rvt_rnr_tbl_to_usec(u32 index)
2185{
2186        return ib_rvt_rnr_table[(index & IB_AETH_CREDIT_MASK)];
2187}
2188EXPORT_SYMBOL(rvt_rnr_tbl_to_usec);
2189
2190static inline unsigned long rvt_aeth_to_usec(u32 aeth)
2191{
2192        return ib_rvt_rnr_table[(aeth >> IB_AETH_CREDIT_SHIFT) &
2193                                  IB_AETH_CREDIT_MASK];
2194}
2195
2196/*
2197 *  rvt_add_retry_timer - add/start a retry timer
2198 *  @qp - the QP
2199 *  add a retry timer on the QP
2200 */
2201void rvt_add_retry_timer(struct rvt_qp *qp)
2202{
2203        struct ib_qp *ibqp = &qp->ibqp;
2204        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
2205
2206        lockdep_assert_held(&qp->s_lock);
2207        qp->s_flags |= RVT_S_TIMER;
2208       /* 4.096 usec. * (1 << qp->timeout) */
2209        qp->s_timer.expires = jiffies + qp->timeout_jiffies +
2210                             rdi->busy_jiffies;
2211        add_timer(&qp->s_timer);
2212}
2213EXPORT_SYMBOL(rvt_add_retry_timer);
2214
2215/**
2216 * rvt_add_rnr_timer - add/start an rnr timer
2217 * @qp - the QP
2218 * @aeth - aeth of RNR timeout, simulated aeth for loopback
2219 * add an rnr timer on the QP
2220 */
2221void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth)
2222{
2223        u32 to;
2224
2225        lockdep_assert_held(&qp->s_lock);
2226        qp->s_flags |= RVT_S_WAIT_RNR;
2227        to = rvt_aeth_to_usec(aeth);
2228        trace_rvt_rnrnak_add(qp, to);
2229        hrtimer_start(&qp->s_rnr_timer,
2230                      ns_to_ktime(1000 * to), HRTIMER_MODE_REL_PINNED);
2231}
2232EXPORT_SYMBOL(rvt_add_rnr_timer);
2233
2234/**
2235 * rvt_stop_rc_timers - stop all timers
2236 * @qp - the QP
2237 * stop any pending timers
2238 */
2239void rvt_stop_rc_timers(struct rvt_qp *qp)
2240{
2241        lockdep_assert_held(&qp->s_lock);
2242        /* Remove QP from all timers */
2243        if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
2244                qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
2245                del_timer(&qp->s_timer);
2246                hrtimer_try_to_cancel(&qp->s_rnr_timer);
2247        }
2248}
2249EXPORT_SYMBOL(rvt_stop_rc_timers);
2250
2251/**
2252 * rvt_stop_rnr_timer - stop an rnr timer
2253 * @qp - the QP
2254 *
2255 * stop an rnr timer and return if the timer
2256 * had been pending.
2257 */
2258static void rvt_stop_rnr_timer(struct rvt_qp *qp)
2259{
2260        lockdep_assert_held(&qp->s_lock);
2261        /* Remove QP from rnr timer */
2262        if (qp->s_flags & RVT_S_WAIT_RNR) {
2263                qp->s_flags &= ~RVT_S_WAIT_RNR;
2264                trace_rvt_rnrnak_stop(qp, 0);
2265        }
2266}
2267
2268/**
2269 * rvt_del_timers_sync - wait for any timeout routines to exit
2270 * @qp - the QP
2271 */
2272void rvt_del_timers_sync(struct rvt_qp *qp)
2273{
2274        del_timer_sync(&qp->s_timer);
2275        hrtimer_cancel(&qp->s_rnr_timer);
2276}
2277EXPORT_SYMBOL(rvt_del_timers_sync);
2278
2279/**
2280 * This is called from s_timer for missing responses.
2281 */
2282static void rvt_rc_timeout(struct timer_list *t)
2283{
2284        struct rvt_qp *qp = from_timer(qp, t, s_timer);
2285        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2286        unsigned long flags;
2287
2288        spin_lock_irqsave(&qp->r_lock, flags);
2289        spin_lock(&qp->s_lock);
2290        if (qp->s_flags & RVT_S_TIMER) {
2291                struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
2292
2293                qp->s_flags &= ~RVT_S_TIMER;
2294                rvp->n_rc_timeouts++;
2295                del_timer(&qp->s_timer);
2296                trace_rvt_rc_timeout(qp, qp->s_last_psn + 1);
2297                if (rdi->driver_f.notify_restart_rc)
2298                        rdi->driver_f.notify_restart_rc(qp,
2299                                                        qp->s_last_psn + 1,
2300                                                        1);
2301                rdi->driver_f.schedule_send(qp);
2302        }
2303        spin_unlock(&qp->s_lock);
2304        spin_unlock_irqrestore(&qp->r_lock, flags);
2305}
2306
2307/*
2308 * This is called from s_timer for RNR timeouts.
2309 */
2310enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t)
2311{
2312        struct rvt_qp *qp = container_of(t, struct rvt_qp, s_rnr_timer);
2313        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2314        unsigned long flags;
2315
2316        spin_lock_irqsave(&qp->s_lock, flags);
2317        rvt_stop_rnr_timer(qp);
2318        trace_rvt_rnrnak_timeout(qp, 0);
2319        rdi->driver_f.schedule_send(qp);
2320        spin_unlock_irqrestore(&qp->s_lock, flags);
2321        return HRTIMER_NORESTART;
2322}
2323EXPORT_SYMBOL(rvt_rc_rnr_retry);
2324
2325/**
2326 * rvt_qp_iter_init - initial for QP iteration
2327 * @rdi: rvt devinfo
2328 * @v: u64 value
2329 *
2330 * This returns an iterator suitable for iterating QPs
2331 * in the system.
2332 *
2333 * The @cb is a user defined callback and @v is a 64
2334 * bit value passed to and relevant for processing in the
2335 * @cb.  An example use case would be to alter QP processing
2336 * based on criteria not part of the rvt_qp.
2337 *
2338 * Use cases that require memory allocation to succeed
2339 * must preallocate appropriately.
2340 *
2341 * Return: a pointer to an rvt_qp_iter or NULL
2342 */
2343struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi,
2344                                     u64 v,
2345                                     void (*cb)(struct rvt_qp *qp, u64 v))
2346{
2347        struct rvt_qp_iter *i;
2348
2349        i = kzalloc(sizeof(*i), GFP_KERNEL);
2350        if (!i)
2351                return NULL;
2352
2353        i->rdi = rdi;
2354        /* number of special QPs (SMI/GSI) for device */
2355        i->specials = rdi->ibdev.phys_port_cnt * 2;
2356        i->v = v;
2357        i->cb = cb;
2358
2359        return i;
2360}
2361EXPORT_SYMBOL(rvt_qp_iter_init);
2362
2363/**
2364 * rvt_qp_iter_next - return the next QP in iter
2365 * @iter - the iterator
2366 *
2367 * Fine grained QP iterator suitable for use
2368 * with debugfs seq_file mechanisms.
2369 *
2370 * Updates iter->qp with the current QP when the return
2371 * value is 0.
2372 *
2373 * Return: 0 - iter->qp is valid 1 - no more QPs
2374 */
2375int rvt_qp_iter_next(struct rvt_qp_iter *iter)
2376        __must_hold(RCU)
2377{
2378        int n = iter->n;
2379        int ret = 1;
2380        struct rvt_qp *pqp = iter->qp;
2381        struct rvt_qp *qp;
2382        struct rvt_dev_info *rdi = iter->rdi;
2383
2384        /*
2385         * The approach is to consider the special qps
2386         * as additional table entries before the
2387         * real hash table.  Since the qp code sets
2388         * the qp->next hash link to NULL, this works just fine.
2389         *
2390         * iter->specials is 2 * # ports
2391         *
2392         * n = 0..iter->specials is the special qp indices
2393         *
2394         * n = iter->specials..rdi->qp_dev->qp_table_size+iter->specials are
2395         * the potential hash bucket entries
2396         *
2397         */
2398        for (; n <  rdi->qp_dev->qp_table_size + iter->specials; n++) {
2399                if (pqp) {
2400                        qp = rcu_dereference(pqp->next);
2401                } else {
2402                        if (n < iter->specials) {
2403                                struct rvt_ibport *rvp;
2404                                int pidx;
2405
2406                                pidx = n % rdi->ibdev.phys_port_cnt;
2407                                rvp = rdi->ports[pidx];
2408                                qp = rcu_dereference(rvp->qp[n & 1]);
2409                        } else {
2410                                qp = rcu_dereference(
2411                                        rdi->qp_dev->qp_table[
2412                                                (n - iter->specials)]);
2413                        }
2414                }
2415                pqp = qp;
2416                if (qp) {
2417                        iter->qp = qp;
2418                        iter->n = n;
2419                        return 0;
2420                }
2421        }
2422        return ret;
2423}
2424EXPORT_SYMBOL(rvt_qp_iter_next);
2425
2426/**
2427 * rvt_qp_iter - iterate all QPs
2428 * @rdi - rvt devinfo
2429 * @v - a 64 bit value
2430 * @cb - a callback
2431 *
2432 * This provides a way for iterating all QPs.
2433 *
2434 * The @cb is a user defined callback and @v is a 64
2435 * bit value passed to and relevant for processing in the
2436 * cb.  An example use case would be to alter QP processing
2437 * based on criteria not part of the rvt_qp.
2438 *
2439 * The code has an internal iterator to simplify
2440 * non seq_file use cases.
2441 */
2442void rvt_qp_iter(struct rvt_dev_info *rdi,
2443                 u64 v,
2444                 void (*cb)(struct rvt_qp *qp, u64 v))
2445{
2446        int ret;
2447        struct rvt_qp_iter i = {
2448                .rdi = rdi,
2449                .specials = rdi->ibdev.phys_port_cnt * 2,
2450                .v = v,
2451                .cb = cb
2452        };
2453
2454        rcu_read_lock();
2455        do {
2456                ret = rvt_qp_iter_next(&i);
2457                if (!ret) {
2458                        rvt_get_qp(i.qp);
2459                        rcu_read_unlock();
2460                        i.cb(i.qp, i.v);
2461                        rcu_read_lock();
2462                        rvt_put_qp(i.qp);
2463                }
2464        } while (!ret);
2465        rcu_read_unlock();
2466}
2467EXPORT_SYMBOL(rvt_qp_iter);
2468