linux/drivers/infiniband/sw/rdmavt/qp.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2016, 2017 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48#include <linux/hash.h>
  49#include <linux/bitops.h>
  50#include <linux/lockdep.h>
  51#include <linux/vmalloc.h>
  52#include <linux/slab.h>
  53#include <rdma/ib_verbs.h>
  54#include <rdma/ib_hdrs.h>
  55#include <rdma/opa_addr.h>
  56#include "qp.h"
  57#include "vt.h"
  58#include "trace.h"
  59
  60static void rvt_rc_timeout(unsigned long arg);
  61
  62/*
  63 * Convert the AETH RNR timeout code into the number of microseconds.
  64 */
  65static const u32 ib_rvt_rnr_table[32] = {
  66        655360, /* 00: 655.36 */
  67        10,     /* 01:    .01 */
  68        20,     /* 02     .02 */
  69        30,     /* 03:    .03 */
  70        40,     /* 04:    .04 */
  71        60,     /* 05:    .06 */
  72        80,     /* 06:    .08 */
  73        120,    /* 07:    .12 */
  74        160,    /* 08:    .16 */
  75        240,    /* 09:    .24 */
  76        320,    /* 0A:    .32 */
  77        480,    /* 0B:    .48 */
  78        640,    /* 0C:    .64 */
  79        960,    /* 0D:    .96 */
  80        1280,   /* 0E:   1.28 */
  81        1920,   /* 0F:   1.92 */
  82        2560,   /* 10:   2.56 */
  83        3840,   /* 11:   3.84 */
  84        5120,   /* 12:   5.12 */
  85        7680,   /* 13:   7.68 */
  86        10240,  /* 14:  10.24 */
  87        15360,  /* 15:  15.36 */
  88        20480,  /* 16:  20.48 */
  89        30720,  /* 17:  30.72 */
  90        40960,  /* 18:  40.96 */
  91        61440,  /* 19:  61.44 */
  92        81920,  /* 1A:  81.92 */
  93        122880, /* 1B: 122.88 */
  94        163840, /* 1C: 163.84 */
  95        245760, /* 1D: 245.76 */
  96        327680, /* 1E: 327.68 */
  97        491520  /* 1F: 491.52 */
  98};
  99
 100/*
 101 * Note that it is OK to post send work requests in the SQE and ERR
 102 * states; rvt_do_send() will process them and generate error
 103 * completions as per IB 1.2 C10-96.
 104 */
 105const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
 106        [IB_QPS_RESET] = 0,
 107        [IB_QPS_INIT] = RVT_POST_RECV_OK,
 108        [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
 109        [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
 110            RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
 111            RVT_PROCESS_NEXT_SEND_OK,
 112        [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
 113            RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
 114        [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
 115            RVT_POST_SEND_OK | RVT_FLUSH_SEND,
 116        [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
 117            RVT_POST_SEND_OK | RVT_FLUSH_SEND,
 118};
 119EXPORT_SYMBOL(ib_rvt_state_ops);
 120
 121static void get_map_page(struct rvt_qpn_table *qpt,
 122                         struct rvt_qpn_map *map)
 123{
 124        unsigned long page = get_zeroed_page(GFP_KERNEL);
 125
 126        /*
 127         * Free the page if someone raced with us installing it.
 128         */
 129
 130        spin_lock(&qpt->lock);
 131        if (map->page)
 132                free_page(page);
 133        else
 134                map->page = (void *)page;
 135        spin_unlock(&qpt->lock);
 136}
 137
 138/**
 139 * init_qpn_table - initialize the QP number table for a device
 140 * @qpt: the QPN table
 141 */
 142static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
 143{
 144        u32 offset, i;
 145        struct rvt_qpn_map *map;
 146        int ret = 0;
 147
 148        if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start))
 149                return -EINVAL;
 150
 151        spin_lock_init(&qpt->lock);
 152
 153        qpt->last = rdi->dparms.qpn_start;
 154        qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift;
 155
 156        /*
 157         * Drivers may want some QPs beyond what we need for verbs let them use
 158         * our qpn table. No need for two. Lets go ahead and mark the bitmaps
 159         * for those. The reserved range must be *after* the range which verbs
 160         * will pick from.
 161         */
 162
 163        /* Figure out number of bit maps needed before reserved range */
 164        qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE;
 165
 166        /* This should always be zero */
 167        offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK;
 168
 169        /* Starting with the first reserved bit map */
 170        map = &qpt->map[qpt->nmaps];
 171
 172        rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n",
 173                    rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
 174        for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
 175                if (!map->page) {
 176                        get_map_page(qpt, map);
 177                        if (!map->page) {
 178                                ret = -ENOMEM;
 179                                break;
 180                        }
 181                }
 182                set_bit(offset, map->page);
 183                offset++;
 184                if (offset == RVT_BITS_PER_PAGE) {
 185                        /* next page */
 186                        qpt->nmaps++;
 187                        map++;
 188                        offset = 0;
 189                }
 190        }
 191        return ret;
 192}
 193
 194/**
 195 * free_qpn_table - free the QP number table for a device
 196 * @qpt: the QPN table
 197 */
 198static void free_qpn_table(struct rvt_qpn_table *qpt)
 199{
 200        int i;
 201
 202        for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
 203                free_page((unsigned long)qpt->map[i].page);
 204}
 205
 206/**
 207 * rvt_driver_qp_init - Init driver qp resources
 208 * @rdi: rvt dev strucutre
 209 *
 210 * Return: 0 on success
 211 */
 212int rvt_driver_qp_init(struct rvt_dev_info *rdi)
 213{
 214        int i;
 215        int ret = -ENOMEM;
 216
 217        if (!rdi->dparms.qp_table_size)
 218                return -EINVAL;
 219
 220        /*
 221         * If driver is not doing any QP allocation then make sure it is
 222         * providing the necessary QP functions.
 223         */
 224        if (!rdi->driver_f.free_all_qps ||
 225            !rdi->driver_f.qp_priv_alloc ||
 226            !rdi->driver_f.qp_priv_free ||
 227            !rdi->driver_f.notify_qp_reset ||
 228            !rdi->driver_f.notify_restart_rc)
 229                return -EINVAL;
 230
 231        /* allocate parent object */
 232        rdi->qp_dev = kzalloc_node(sizeof(*rdi->qp_dev), GFP_KERNEL,
 233                                   rdi->dparms.node);
 234        if (!rdi->qp_dev)
 235                return -ENOMEM;
 236
 237        /* allocate hash table */
 238        rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
 239        rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
 240        rdi->qp_dev->qp_table =
 241                kmalloc_node(rdi->qp_dev->qp_table_size *
 242                             sizeof(*rdi->qp_dev->qp_table),
 243                             GFP_KERNEL, rdi->dparms.node);
 244        if (!rdi->qp_dev->qp_table)
 245                goto no_qp_table;
 246
 247        for (i = 0; i < rdi->qp_dev->qp_table_size; i++)
 248                RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL);
 249
 250        spin_lock_init(&rdi->qp_dev->qpt_lock);
 251
 252        /* initialize qpn map */
 253        if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
 254                goto fail_table;
 255
 256        spin_lock_init(&rdi->n_qps_lock);
 257
 258        return 0;
 259
 260fail_table:
 261        kfree(rdi->qp_dev->qp_table);
 262        free_qpn_table(&rdi->qp_dev->qpn_table);
 263
 264no_qp_table:
 265        kfree(rdi->qp_dev);
 266
 267        return ret;
 268}
 269
 270/**
 271 * free_all_qps - check for QPs still in use
 272 * @qpt: the QP table to empty
 273 *
 274 * There should not be any QPs still in use.
 275 * Free memory for table.
 276 */
 277static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
 278{
 279        unsigned long flags;
 280        struct rvt_qp *qp;
 281        unsigned n, qp_inuse = 0;
 282        spinlock_t *ql; /* work around too long line below */
 283
 284        if (rdi->driver_f.free_all_qps)
 285                qp_inuse = rdi->driver_f.free_all_qps(rdi);
 286
 287        qp_inuse += rvt_mcast_tree_empty(rdi);
 288
 289        if (!rdi->qp_dev)
 290                return qp_inuse;
 291
 292        ql = &rdi->qp_dev->qpt_lock;
 293        spin_lock_irqsave(ql, flags);
 294        for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
 295                qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
 296                                               lockdep_is_held(ql));
 297                RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
 298
 299                for (; qp; qp = rcu_dereference_protected(qp->next,
 300                                                          lockdep_is_held(ql)))
 301                        qp_inuse++;
 302        }
 303        spin_unlock_irqrestore(ql, flags);
 304        synchronize_rcu();
 305        return qp_inuse;
 306}
 307
 308/**
 309 * rvt_qp_exit - clean up qps on device exit
 310 * @rdi: rvt dev structure
 311 *
 312 * Check for qp leaks and free resources.
 313 */
 314void rvt_qp_exit(struct rvt_dev_info *rdi)
 315{
 316        u32 qps_inuse = rvt_free_all_qps(rdi);
 317
 318        if (qps_inuse)
 319                rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
 320                           qps_inuse);
 321        if (!rdi->qp_dev)
 322                return;
 323
 324        kfree(rdi->qp_dev->qp_table);
 325        free_qpn_table(&rdi->qp_dev->qpn_table);
 326        kfree(rdi->qp_dev);
 327}
 328
 329static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
 330                              struct rvt_qpn_map *map, unsigned off)
 331{
 332        return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
 333}
 334
 335/**
 336 * alloc_qpn - Allocate the next available qpn or zero/one for QP type
 337 *             IB_QPT_SMI/IB_QPT_GSI
 338 *@rdi: rvt device info structure
 339 *@qpt: queue pair number table pointer
 340 *@port_num: IB port number, 1 based, comes from core
 341 *
 342 * Return: The queue pair number
 343 */
 344static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 345                     enum ib_qp_type type, u8 port_num)
 346{
 347        u32 i, offset, max_scan, qpn;
 348        struct rvt_qpn_map *map;
 349        u32 ret;
 350
 351        if (rdi->driver_f.alloc_qpn)
 352                return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num);
 353
 354        if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
 355                unsigned n;
 356
 357                ret = type == IB_QPT_GSI;
 358                n = 1 << (ret + 2 * (port_num - 1));
 359                spin_lock(&qpt->lock);
 360                if (qpt->flags & n)
 361                        ret = -EINVAL;
 362                else
 363                        qpt->flags |= n;
 364                spin_unlock(&qpt->lock);
 365                goto bail;
 366        }
 367
 368        qpn = qpt->last + qpt->incr;
 369        if (qpn >= RVT_QPN_MAX)
 370                qpn = qpt->incr | ((qpt->last & 1) ^ 1);
 371        /* offset carries bit 0 */
 372        offset = qpn & RVT_BITS_PER_PAGE_MASK;
 373        map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
 374        max_scan = qpt->nmaps - !offset;
 375        for (i = 0;;) {
 376                if (unlikely(!map->page)) {
 377                        get_map_page(qpt, map);
 378                        if (unlikely(!map->page))
 379                                break;
 380                }
 381                do {
 382                        if (!test_and_set_bit(offset, map->page)) {
 383                                qpt->last = qpn;
 384                                ret = qpn;
 385                                goto bail;
 386                        }
 387                        offset += qpt->incr;
 388                        /*
 389                         * This qpn might be bogus if offset >= BITS_PER_PAGE.
 390                         * That is OK.   It gets re-assigned below
 391                         */
 392                        qpn = mk_qpn(qpt, map, offset);
 393                } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
 394                /*
 395                 * In order to keep the number of pages allocated to a
 396                 * minimum, we scan the all existing pages before increasing
 397                 * the size of the bitmap table.
 398                 */
 399                if (++i > max_scan) {
 400                        if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
 401                                break;
 402                        map = &qpt->map[qpt->nmaps++];
 403                        /* start at incr with current bit 0 */
 404                        offset = qpt->incr | (offset & 1);
 405                } else if (map < &qpt->map[qpt->nmaps]) {
 406                        ++map;
 407                        /* start at incr with current bit 0 */
 408                        offset = qpt->incr | (offset & 1);
 409                } else {
 410                        map = &qpt->map[0];
 411                        /* wrap to first map page, invert bit 0 */
 412                        offset = qpt->incr | ((offset & 1) ^ 1);
 413                }
 414                /* there can be no set bits in low-order QoS bits */
 415                WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1));
 416                qpn = mk_qpn(qpt, map, offset);
 417        }
 418
 419        ret = -ENOMEM;
 420
 421bail:
 422        return ret;
 423}
 424
 425/**
 426 * rvt_clear_mr_refs - Drop help mr refs
 427 * @qp: rvt qp data structure
 428 * @clr_sends: If shoudl clear send side or not
 429 */
 430static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
 431{
 432        unsigned n;
 433        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
 434
 435        if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
 436                rvt_put_ss(&qp->s_rdma_read_sge);
 437
 438        rvt_put_ss(&qp->r_sge);
 439
 440        if (clr_sends) {
 441                while (qp->s_last != qp->s_head) {
 442                        struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 443
 444                        rvt_put_swqe(wqe);
 445
 446                        if (qp->ibqp.qp_type == IB_QPT_UD ||
 447                            qp->ibqp.qp_type == IB_QPT_SMI ||
 448                            qp->ibqp.qp_type == IB_QPT_GSI)
 449                                atomic_dec(&ibah_to_rvtah(
 450                                                wqe->ud_wr.ah)->refcount);
 451                        if (++qp->s_last >= qp->s_size)
 452                                qp->s_last = 0;
 453                        smp_wmb(); /* see qp_set_savail */
 454                }
 455                if (qp->s_rdma_mr) {
 456                        rvt_put_mr(qp->s_rdma_mr);
 457                        qp->s_rdma_mr = NULL;
 458                }
 459        }
 460
 461        for (n = 0; qp->s_ack_queue && n < rvt_max_atomic(rdi); n++) {
 462                struct rvt_ack_entry *e = &qp->s_ack_queue[n];
 463
 464                if (e->rdma_sge.mr) {
 465                        rvt_put_mr(e->rdma_sge.mr);
 466                        e->rdma_sge.mr = NULL;
 467                }
 468        }
 469}
 470
 471/**
 472 * rvt_swqe_has_lkey - return true if lkey is used by swqe
 473 * @wqe - the send wqe
 474 * @lkey - the lkey
 475 *
 476 * Test the swqe for using lkey
 477 */
 478static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey)
 479{
 480        int i;
 481
 482        for (i = 0; i < wqe->wr.num_sge; i++) {
 483                struct rvt_sge *sge = &wqe->sg_list[i];
 484
 485                if (rvt_mr_has_lkey(sge->mr, lkey))
 486                        return true;
 487        }
 488        return false;
 489}
 490
 491/**
 492 * rvt_qp_sends_has_lkey - return true is qp sends use lkey
 493 * @qp - the rvt_qp
 494 * @lkey - the lkey
 495 */
 496static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey)
 497{
 498        u32 s_last = qp->s_last;
 499
 500        while (s_last != qp->s_head) {
 501                struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, s_last);
 502
 503                if (rvt_swqe_has_lkey(wqe, lkey))
 504                        return true;
 505
 506                if (++s_last >= qp->s_size)
 507                        s_last = 0;
 508        }
 509        if (qp->s_rdma_mr)
 510                if (rvt_mr_has_lkey(qp->s_rdma_mr, lkey))
 511                        return true;
 512        return false;
 513}
 514
 515/**
 516 * rvt_qp_acks_has_lkey - return true if acks have lkey
 517 * @qp - the qp
 518 * @lkey - the lkey
 519 */
 520static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey)
 521{
 522        int i;
 523        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
 524
 525        for (i = 0; qp->s_ack_queue && i < rvt_max_atomic(rdi); i++) {
 526                struct rvt_ack_entry *e = &qp->s_ack_queue[i];
 527
 528                if (rvt_mr_has_lkey(e->rdma_sge.mr, lkey))
 529                        return true;
 530        }
 531        return false;
 532}
 533
 534/*
 535 * rvt_qp_mr_clean - clean up remote ops for lkey
 536 * @qp - the qp
 537 * @lkey - the lkey that is being de-registered
 538 *
 539 * This routine checks if the lkey is being used by
 540 * the qp.
 541 *
 542 * If so, the qp is put into an error state to elminate
 543 * any references from the qp.
 544 */
 545void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey)
 546{
 547        bool lastwqe = false;
 548
 549        if (qp->ibqp.qp_type == IB_QPT_SMI ||
 550            qp->ibqp.qp_type == IB_QPT_GSI)
 551                /* avoid special QPs */
 552                return;
 553        spin_lock_irq(&qp->r_lock);
 554        spin_lock(&qp->s_hlock);
 555        spin_lock(&qp->s_lock);
 556
 557        if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
 558                goto check_lwqe;
 559
 560        if (rvt_ss_has_lkey(&qp->r_sge, lkey) ||
 561            rvt_qp_sends_has_lkey(qp, lkey) ||
 562            rvt_qp_acks_has_lkey(qp, lkey))
 563                lastwqe = rvt_error_qp(qp, IB_WC_LOC_PROT_ERR);
 564check_lwqe:
 565        spin_unlock(&qp->s_lock);
 566        spin_unlock(&qp->s_hlock);
 567        spin_unlock_irq(&qp->r_lock);
 568        if (lastwqe) {
 569                struct ib_event ev;
 570
 571                ev.device = qp->ibqp.device;
 572                ev.element.qp = &qp->ibqp;
 573                ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
 574                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 575        }
 576}
 577
 578/**
 579 * rvt_remove_qp - remove qp form table
 580 * @rdi: rvt dev struct
 581 * @qp: qp to remove
 582 *
 583 * Remove the QP from the table so it can't be found asynchronously by
 584 * the receive routine.
 585 */
 586static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 587{
 588        struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
 589        u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
 590        unsigned long flags;
 591        int removed = 1;
 592
 593        spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
 594
 595        if (rcu_dereference_protected(rvp->qp[0],
 596                        lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
 597                RCU_INIT_POINTER(rvp->qp[0], NULL);
 598        } else if (rcu_dereference_protected(rvp->qp[1],
 599                        lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
 600                RCU_INIT_POINTER(rvp->qp[1], NULL);
 601        } else {
 602                struct rvt_qp *q;
 603                struct rvt_qp __rcu **qpp;
 604
 605                removed = 0;
 606                qpp = &rdi->qp_dev->qp_table[n];
 607                for (; (q = rcu_dereference_protected(*qpp,
 608                        lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL;
 609                        qpp = &q->next) {
 610                        if (q == qp) {
 611                                RCU_INIT_POINTER(*qpp,
 612                                     rcu_dereference_protected(qp->next,
 613                                     lockdep_is_held(&rdi->qp_dev->qpt_lock)));
 614                                removed = 1;
 615                                trace_rvt_qpremove(qp, n);
 616                                break;
 617                        }
 618                }
 619        }
 620
 621        spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
 622        if (removed) {
 623                synchronize_rcu();
 624                rvt_put_qp(qp);
 625        }
 626}
 627
 628/**
 629 * rvt_init_qp - initialize the QP state to the reset state
 630 * @qp: the QP to init or reinit
 631 * @type: the QP type
 632 *
 633 * This function is called from both rvt_create_qp() and
 634 * rvt_reset_qp().   The difference is that the reset
 635 * patch the necessary locks to protect against concurent
 636 * access.
 637 */
 638static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 639                        enum ib_qp_type type)
 640{
 641        qp->remote_qpn = 0;
 642        qp->qkey = 0;
 643        qp->qp_access_flags = 0;
 644        qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
 645        qp->s_hdrwords = 0;
 646        qp->s_wqe = NULL;
 647        qp->s_draining = 0;
 648        qp->s_next_psn = 0;
 649        qp->s_last_psn = 0;
 650        qp->s_sending_psn = 0;
 651        qp->s_sending_hpsn = 0;
 652        qp->s_psn = 0;
 653        qp->r_psn = 0;
 654        qp->r_msn = 0;
 655        if (type == IB_QPT_RC) {
 656                qp->s_state = IB_OPCODE_RC_SEND_LAST;
 657                qp->r_state = IB_OPCODE_RC_SEND_LAST;
 658        } else {
 659                qp->s_state = IB_OPCODE_UC_SEND_LAST;
 660                qp->r_state = IB_OPCODE_UC_SEND_LAST;
 661        }
 662        qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 663        qp->r_nak_state = 0;
 664        qp->r_aflags = 0;
 665        qp->r_flags = 0;
 666        qp->s_head = 0;
 667        qp->s_tail = 0;
 668        qp->s_cur = 0;
 669        qp->s_acked = 0;
 670        qp->s_last = 0;
 671        qp->s_ssn = 1;
 672        qp->s_lsn = 0;
 673        qp->s_mig_state = IB_MIG_MIGRATED;
 674        qp->r_head_ack_queue = 0;
 675        qp->s_tail_ack_queue = 0;
 676        qp->s_num_rd_atomic = 0;
 677        if (qp->r_rq.wq) {
 678                qp->r_rq.wq->head = 0;
 679                qp->r_rq.wq->tail = 0;
 680        }
 681        qp->r_sge.num_sge = 0;
 682        atomic_set(&qp->s_reserved_used, 0);
 683}
 684
 685/**
 686 * rvt_reset_qp - initialize the QP state to the reset state
 687 * @qp: the QP to reset
 688 * @type: the QP type
 689 *
 690 * r_lock, s_hlock, and s_lock are required to be held by the caller
 691 */
 692static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 693                         enum ib_qp_type type)
 694        __must_hold(&qp->s_lock)
 695        __must_hold(&qp->s_hlock)
 696        __must_hold(&qp->r_lock)
 697{
 698        lockdep_assert_held(&qp->r_lock);
 699        lockdep_assert_held(&qp->s_hlock);
 700        lockdep_assert_held(&qp->s_lock);
 701        if (qp->state != IB_QPS_RESET) {
 702                qp->state = IB_QPS_RESET;
 703
 704                /* Let drivers flush their waitlist */
 705                rdi->driver_f.flush_qp_waiters(qp);
 706                rvt_stop_rc_timers(qp);
 707                qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
 708                spin_unlock(&qp->s_lock);
 709                spin_unlock(&qp->s_hlock);
 710                spin_unlock_irq(&qp->r_lock);
 711
 712                /* Stop the send queue and the retry timer */
 713                rdi->driver_f.stop_send_queue(qp);
 714                rvt_del_timers_sync(qp);
 715                /* Wait for things to stop */
 716                rdi->driver_f.quiesce_qp(qp);
 717
 718                /* take qp out the hash and wait for it to be unused */
 719                rvt_remove_qp(rdi, qp);
 720                wait_event(qp->wait, !atomic_read(&qp->refcount));
 721
 722                /* grab the lock b/c it was locked at call time */
 723                spin_lock_irq(&qp->r_lock);
 724                spin_lock(&qp->s_hlock);
 725                spin_lock(&qp->s_lock);
 726
 727                rvt_clear_mr_refs(qp, 1);
 728                /*
 729                 * Let the driver do any tear down or re-init it needs to for
 730                 * a qp that has been reset
 731                 */
 732                rdi->driver_f.notify_qp_reset(qp);
 733        }
 734        rvt_init_qp(rdi, qp, type);
 735        lockdep_assert_held(&qp->r_lock);
 736        lockdep_assert_held(&qp->s_hlock);
 737        lockdep_assert_held(&qp->s_lock);
 738}
 739
 740/** rvt_free_qpn - Free a qpn from the bit map
 741 * @qpt: QP table
 742 * @qpn: queue pair number to free
 743 */
 744static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
 745{
 746        struct rvt_qpn_map *map;
 747
 748        map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE;
 749        if (map->page)
 750                clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
 751}
 752
 753/**
 754 * rvt_create_qp - create a queue pair for a device
 755 * @ibpd: the protection domain who's device we create the queue pair for
 756 * @init_attr: the attributes of the queue pair
 757 * @udata: user data for libibverbs.so
 758 *
 759 * Queue pair creation is mostly an rvt issue. However, drivers have their own
 760 * unique idea of what queue pair numbers mean. For instance there is a reserved
 761 * range for PSM.
 762 *
 763 * Return: the queue pair on success, otherwise returns an errno.
 764 *
 765 * Called by the ib_create_qp() core verbs function.
 766 */
 767struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 768                            struct ib_qp_init_attr *init_attr,
 769                            struct ib_udata *udata)
 770{
 771        struct rvt_qp *qp;
 772        int err;
 773        struct rvt_swqe *swq = NULL;
 774        size_t sz;
 775        size_t sg_list_sz;
 776        struct ib_qp *ret = ERR_PTR(-ENOMEM);
 777        struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
 778        void *priv = NULL;
 779        size_t sqsize;
 780
 781        if (!rdi)
 782                return ERR_PTR(-EINVAL);
 783
 784        if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
 785            init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
 786            init_attr->create_flags)
 787                return ERR_PTR(-EINVAL);
 788
 789        /* Check receive queue parameters if no SRQ is specified. */
 790        if (!init_attr->srq) {
 791                if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
 792                    init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
 793                        return ERR_PTR(-EINVAL);
 794
 795                if (init_attr->cap.max_send_sge +
 796                    init_attr->cap.max_send_wr +
 797                    init_attr->cap.max_recv_sge +
 798                    init_attr->cap.max_recv_wr == 0)
 799                        return ERR_PTR(-EINVAL);
 800        }
 801        sqsize =
 802                init_attr->cap.max_send_wr + 1 +
 803                rdi->dparms.reserved_operations;
 804        switch (init_attr->qp_type) {
 805        case IB_QPT_SMI:
 806        case IB_QPT_GSI:
 807                if (init_attr->port_num == 0 ||
 808                    init_attr->port_num > ibpd->device->phys_port_cnt)
 809                        return ERR_PTR(-EINVAL);
 810        case IB_QPT_UC:
 811        case IB_QPT_RC:
 812        case IB_QPT_UD:
 813                sz = sizeof(struct rvt_sge) *
 814                        init_attr->cap.max_send_sge +
 815                        sizeof(struct rvt_swqe);
 816                swq = vzalloc_node(sqsize * sz, rdi->dparms.node);
 817                if (!swq)
 818                        return ERR_PTR(-ENOMEM);
 819
 820                sz = sizeof(*qp);
 821                sg_list_sz = 0;
 822                if (init_attr->srq) {
 823                        struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
 824
 825                        if (srq->rq.max_sge > 1)
 826                                sg_list_sz = sizeof(*qp->r_sg_list) *
 827                                        (srq->rq.max_sge - 1);
 828                } else if (init_attr->cap.max_recv_sge > 1)
 829                        sg_list_sz = sizeof(*qp->r_sg_list) *
 830                                (init_attr->cap.max_recv_sge - 1);
 831                qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL,
 832                                  rdi->dparms.node);
 833                if (!qp)
 834                        goto bail_swq;
 835
 836                RCU_INIT_POINTER(qp->next, NULL);
 837                if (init_attr->qp_type == IB_QPT_RC) {
 838                        qp->s_ack_queue =
 839                                kzalloc_node(
 840                                        sizeof(*qp->s_ack_queue) *
 841                                         rvt_max_atomic(rdi),
 842                                        GFP_KERNEL,
 843                                        rdi->dparms.node);
 844                        if (!qp->s_ack_queue)
 845                                goto bail_qp;
 846                }
 847                /* initialize timers needed for rc qp */
 848                setup_timer(&qp->s_timer, rvt_rc_timeout, (unsigned long)qp);
 849                hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC,
 850                             HRTIMER_MODE_REL);
 851                qp->s_rnr_timer.function = rvt_rc_rnr_retry;
 852
 853                /*
 854                 * Driver needs to set up it's private QP structure and do any
 855                 * initialization that is needed.
 856                 */
 857                priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
 858                if (IS_ERR(priv)) {
 859                        ret = priv;
 860                        goto bail_qp;
 861                }
 862                qp->priv = priv;
 863                qp->timeout_jiffies =
 864                        usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
 865                                1000UL);
 866                if (init_attr->srq) {
 867                        sz = 0;
 868                } else {
 869                        qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
 870                        qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
 871                        sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
 872                                sizeof(struct rvt_rwqe);
 873                        if (udata)
 874                                qp->r_rq.wq = vmalloc_user(
 875                                                sizeof(struct rvt_rwq) +
 876                                                qp->r_rq.size * sz);
 877                        else
 878                                qp->r_rq.wq = vzalloc_node(
 879                                                sizeof(struct rvt_rwq) +
 880                                                qp->r_rq.size * sz,
 881                                                rdi->dparms.node);
 882                        if (!qp->r_rq.wq)
 883                                goto bail_driver_priv;
 884                }
 885
 886                /*
 887                 * ib_create_qp() will initialize qp->ibqp
 888                 * except for qp->ibqp.qp_num.
 889                 */
 890                spin_lock_init(&qp->r_lock);
 891                spin_lock_init(&qp->s_hlock);
 892                spin_lock_init(&qp->s_lock);
 893                spin_lock_init(&qp->r_rq.lock);
 894                atomic_set(&qp->refcount, 0);
 895                atomic_set(&qp->local_ops_pending, 0);
 896                init_waitqueue_head(&qp->wait);
 897                init_timer(&qp->s_timer);
 898                qp->s_timer.data = (unsigned long)qp;
 899                INIT_LIST_HEAD(&qp->rspwait);
 900                qp->state = IB_QPS_RESET;
 901                qp->s_wq = swq;
 902                qp->s_size = sqsize;
 903                qp->s_avail = init_attr->cap.max_send_wr;
 904                qp->s_max_sge = init_attr->cap.max_send_sge;
 905                if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
 906                        qp->s_flags = RVT_S_SIGNAL_REQ_WR;
 907
 908                err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
 909                                init_attr->qp_type,
 910                                init_attr->port_num);
 911                if (err < 0) {
 912                        ret = ERR_PTR(err);
 913                        goto bail_rq_wq;
 914                }
 915                qp->ibqp.qp_num = err;
 916                qp->port_num = init_attr->port_num;
 917                rvt_init_qp(rdi, qp, init_attr->qp_type);
 918                break;
 919
 920        default:
 921                /* Don't support raw QPs */
 922                return ERR_PTR(-EINVAL);
 923        }
 924
 925        init_attr->cap.max_inline_data = 0;
 926
 927        /*
 928         * Return the address of the RWQ as the offset to mmap.
 929         * See rvt_mmap() for details.
 930         */
 931        if (udata && udata->outlen >= sizeof(__u64)) {
 932                if (!qp->r_rq.wq) {
 933                        __u64 offset = 0;
 934
 935                        err = ib_copy_to_udata(udata, &offset,
 936                                               sizeof(offset));
 937                        if (err) {
 938                                ret = ERR_PTR(err);
 939                                goto bail_qpn;
 940                        }
 941                } else {
 942                        u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
 943
 944                        qp->ip = rvt_create_mmap_info(rdi, s,
 945                                                      ibpd->uobject->context,
 946                                                      qp->r_rq.wq);
 947                        if (!qp->ip) {
 948                                ret = ERR_PTR(-ENOMEM);
 949                                goto bail_qpn;
 950                        }
 951
 952                        err = ib_copy_to_udata(udata, &qp->ip->offset,
 953                                               sizeof(qp->ip->offset));
 954                        if (err) {
 955                                ret = ERR_PTR(err);
 956                                goto bail_ip;
 957                        }
 958                }
 959                qp->pid = current->pid;
 960        }
 961
 962        spin_lock(&rdi->n_qps_lock);
 963        if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
 964                spin_unlock(&rdi->n_qps_lock);
 965                ret = ERR_PTR(-ENOMEM);
 966                goto bail_ip;
 967        }
 968
 969        rdi->n_qps_allocated++;
 970        /*
 971         * Maintain a busy_jiffies variable that will be added to the timeout
 972         * period in mod_retry_timer and add_retry_timer. This busy jiffies
 973         * is scaled by the number of rc qps created for the device to reduce
 974         * the number of timeouts occurring when there is a large number of
 975         * qps. busy_jiffies is incremented every rc qp scaling interval.
 976         * The scaling interval is selected based on extensive performance
 977         * evaluation of targeted workloads.
 978         */
 979        if (init_attr->qp_type == IB_QPT_RC) {
 980                rdi->n_rc_qps++;
 981                rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
 982        }
 983        spin_unlock(&rdi->n_qps_lock);
 984
 985        if (qp->ip) {
 986                spin_lock_irq(&rdi->pending_lock);
 987                list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
 988                spin_unlock_irq(&rdi->pending_lock);
 989        }
 990
 991        ret = &qp->ibqp;
 992
 993        /*
 994         * We have our QP and its good, now keep track of what types of opcodes
 995         * can be processed on this QP. We do this by keeping track of what the
 996         * 3 high order bits of the opcode are.
 997         */
 998        switch (init_attr->qp_type) {
 999        case IB_QPT_SMI:
1000        case IB_QPT_GSI:
1001        case IB_QPT_UD:
1002                qp->allowed_ops = IB_OPCODE_UD;
1003                break;
1004        case IB_QPT_RC:
1005                qp->allowed_ops = IB_OPCODE_RC;
1006                break;
1007        case IB_QPT_UC:
1008                qp->allowed_ops = IB_OPCODE_UC;
1009                break;
1010        default:
1011                ret = ERR_PTR(-EINVAL);
1012                goto bail_ip;
1013        }
1014
1015        return ret;
1016
1017bail_ip:
1018        if (qp->ip)
1019                kref_put(&qp->ip->ref, rvt_release_mmap_info);
1020
1021bail_qpn:
1022        rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
1023
1024bail_rq_wq:
1025        if (!qp->ip)
1026                vfree(qp->r_rq.wq);
1027
1028bail_driver_priv:
1029        rdi->driver_f.qp_priv_free(rdi, qp);
1030
1031bail_qp:
1032        kfree(qp->s_ack_queue);
1033        kfree(qp);
1034
1035bail_swq:
1036        vfree(swq);
1037
1038        return ret;
1039}
1040
1041/**
1042 * rvt_error_qp - put a QP into the error state
1043 * @qp: the QP to put into the error state
1044 * @err: the receive completion error to signal if a RWQE is active
1045 *
1046 * Flushes both send and receive work queues.
1047 *
1048 * Return: true if last WQE event should be generated.
1049 * The QP r_lock and s_lock should be held and interrupts disabled.
1050 * If we are already in error state, just return.
1051 */
1052int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
1053{
1054        struct ib_wc wc;
1055        int ret = 0;
1056        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
1057
1058        lockdep_assert_held(&qp->r_lock);
1059        lockdep_assert_held(&qp->s_lock);
1060        if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
1061                goto bail;
1062
1063        qp->state = IB_QPS_ERR;
1064
1065        if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1066                qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1067                del_timer(&qp->s_timer);
1068        }
1069
1070        if (qp->s_flags & RVT_S_ANY_WAIT_SEND)
1071                qp->s_flags &= ~RVT_S_ANY_WAIT_SEND;
1072
1073        rdi->driver_f.notify_error_qp(qp);
1074
1075        /* Schedule the sending tasklet to drain the send work queue. */
1076        if (ACCESS_ONCE(qp->s_last) != qp->s_head)
1077                rdi->driver_f.schedule_send(qp);
1078
1079        rvt_clear_mr_refs(qp, 0);
1080
1081        memset(&wc, 0, sizeof(wc));
1082        wc.qp = &qp->ibqp;
1083        wc.opcode = IB_WC_RECV;
1084
1085        if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) {
1086                wc.wr_id = qp->r_wr_id;
1087                wc.status = err;
1088                rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1089        }
1090        wc.status = IB_WC_WR_FLUSH_ERR;
1091
1092        if (qp->r_rq.wq) {
1093                struct rvt_rwq *wq;
1094                u32 head;
1095                u32 tail;
1096
1097                spin_lock(&qp->r_rq.lock);
1098
1099                /* sanity check pointers before trusting them */
1100                wq = qp->r_rq.wq;
1101                head = wq->head;
1102                if (head >= qp->r_rq.size)
1103                        head = 0;
1104                tail = wq->tail;
1105                if (tail >= qp->r_rq.size)
1106                        tail = 0;
1107                while (tail != head) {
1108                        wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
1109                        if (++tail >= qp->r_rq.size)
1110                                tail = 0;
1111                        rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1112                }
1113                wq->tail = tail;
1114
1115                spin_unlock(&qp->r_rq.lock);
1116        } else if (qp->ibqp.event_handler) {
1117                ret = 1;
1118        }
1119
1120bail:
1121        return ret;
1122}
1123EXPORT_SYMBOL(rvt_error_qp);
1124
1125/*
1126 * Put the QP into the hash table.
1127 * The hash table holds a reference to the QP.
1128 */
1129static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
1130{
1131        struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
1132        unsigned long flags;
1133
1134        rvt_get_qp(qp);
1135        spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
1136
1137        if (qp->ibqp.qp_num <= 1) {
1138                rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp);
1139        } else {
1140                u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
1141
1142                qp->next = rdi->qp_dev->qp_table[n];
1143                rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp);
1144                trace_rvt_qpinsert(qp, n);
1145        }
1146
1147        spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
1148}
1149
1150/**
1151 * rvt_modify_qp - modify the attributes of a queue pair
1152 * @ibqp: the queue pair who's attributes we're modifying
1153 * @attr: the new attributes
1154 * @attr_mask: the mask of attributes to modify
1155 * @udata: user data for libibverbs.so
1156 *
1157 * Return: 0 on success, otherwise returns an errno.
1158 */
1159int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1160                  int attr_mask, struct ib_udata *udata)
1161{
1162        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1163        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1164        enum ib_qp_state cur_state, new_state;
1165        struct ib_event ev;
1166        int lastwqe = 0;
1167        int mig = 0;
1168        int pmtu = 0; /* for gcc warning only */
1169        enum rdma_link_layer link;
1170        int opa_ah;
1171
1172        link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
1173
1174        spin_lock_irq(&qp->r_lock);
1175        spin_lock(&qp->s_hlock);
1176        spin_lock(&qp->s_lock);
1177
1178        cur_state = attr_mask & IB_QP_CUR_STATE ?
1179                attr->cur_qp_state : qp->state;
1180        new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1181        opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num);
1182
1183        if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
1184                                attr_mask, link))
1185                goto inval;
1186
1187        if (rdi->driver_f.check_modify_qp &&
1188            rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata))
1189                goto inval;
1190
1191        if (attr_mask & IB_QP_AV) {
1192                if (opa_ah) {
1193                        if (rdma_ah_get_dlid(&attr->ah_attr) >=
1194                                opa_get_mcast_base(OPA_MCAST_NR))
1195                                goto inval;
1196                } else {
1197                        if (rdma_ah_get_dlid(&attr->ah_attr) >=
1198                                be16_to_cpu(IB_MULTICAST_LID_BASE))
1199                                goto inval;
1200                }
1201
1202                if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
1203                        goto inval;
1204        }
1205
1206        if (attr_mask & IB_QP_ALT_PATH) {
1207                if (opa_ah) {
1208                        if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
1209                                opa_get_mcast_base(OPA_MCAST_NR))
1210                                goto inval;
1211                } else {
1212                        if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
1213                                be16_to_cpu(IB_MULTICAST_LID_BASE))
1214                                goto inval;
1215                }
1216
1217                if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
1218                        goto inval;
1219                if (attr->alt_pkey_index >= rvt_get_npkeys(rdi))
1220                        goto inval;
1221        }
1222
1223        if (attr_mask & IB_QP_PKEY_INDEX)
1224                if (attr->pkey_index >= rvt_get_npkeys(rdi))
1225                        goto inval;
1226
1227        if (attr_mask & IB_QP_MIN_RNR_TIMER)
1228                if (attr->min_rnr_timer > 31)
1229                        goto inval;
1230
1231        if (attr_mask & IB_QP_PORT)
1232                if (qp->ibqp.qp_type == IB_QPT_SMI ||
1233                    qp->ibqp.qp_type == IB_QPT_GSI ||
1234                    attr->port_num == 0 ||
1235                    attr->port_num > ibqp->device->phys_port_cnt)
1236                        goto inval;
1237
1238        if (attr_mask & IB_QP_DEST_QPN)
1239                if (attr->dest_qp_num > RVT_QPN_MASK)
1240                        goto inval;
1241
1242        if (attr_mask & IB_QP_RETRY_CNT)
1243                if (attr->retry_cnt > 7)
1244                        goto inval;
1245
1246        if (attr_mask & IB_QP_RNR_RETRY)
1247                if (attr->rnr_retry > 7)
1248                        goto inval;
1249
1250        /*
1251         * Don't allow invalid path_mtu values.  OK to set greater
1252         * than the active mtu (or even the max_cap, if we have tuned
1253         * that to a small mtu.  We'll set qp->path_mtu
1254         * to the lesser of requested attribute mtu and active,
1255         * for packetizing messages.
1256         * Note that the QP port has to be set in INIT and MTU in RTR.
1257         */
1258        if (attr_mask & IB_QP_PATH_MTU) {
1259                pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr);
1260                if (pmtu < 0)
1261                        goto inval;
1262        }
1263
1264        if (attr_mask & IB_QP_PATH_MIG_STATE) {
1265                if (attr->path_mig_state == IB_MIG_REARM) {
1266                        if (qp->s_mig_state == IB_MIG_ARMED)
1267                                goto inval;
1268                        if (new_state != IB_QPS_RTS)
1269                                goto inval;
1270                } else if (attr->path_mig_state == IB_MIG_MIGRATED) {
1271                        if (qp->s_mig_state == IB_MIG_REARM)
1272                                goto inval;
1273                        if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
1274                                goto inval;
1275                        if (qp->s_mig_state == IB_MIG_ARMED)
1276                                mig = 1;
1277                } else {
1278                        goto inval;
1279                }
1280        }
1281
1282        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1283                if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic)
1284                        goto inval;
1285
1286        switch (new_state) {
1287        case IB_QPS_RESET:
1288                if (qp->state != IB_QPS_RESET)
1289                        rvt_reset_qp(rdi, qp, ibqp->qp_type);
1290                break;
1291
1292        case IB_QPS_RTR:
1293                /* Allow event to re-trigger if QP set to RTR more than once */
1294                qp->r_flags &= ~RVT_R_COMM_EST;
1295                qp->state = new_state;
1296                break;
1297
1298        case IB_QPS_SQD:
1299                qp->s_draining = qp->s_last != qp->s_cur;
1300                qp->state = new_state;
1301                break;
1302
1303        case IB_QPS_SQE:
1304                if (qp->ibqp.qp_type == IB_QPT_RC)
1305                        goto inval;
1306                qp->state = new_state;
1307                break;
1308
1309        case IB_QPS_ERR:
1310                lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1311                break;
1312
1313        default:
1314                qp->state = new_state;
1315                break;
1316        }
1317
1318        if (attr_mask & IB_QP_PKEY_INDEX)
1319                qp->s_pkey_index = attr->pkey_index;
1320
1321        if (attr_mask & IB_QP_PORT)
1322                qp->port_num = attr->port_num;
1323
1324        if (attr_mask & IB_QP_DEST_QPN)
1325                qp->remote_qpn = attr->dest_qp_num;
1326
1327        if (attr_mask & IB_QP_SQ_PSN) {
1328                qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask;
1329                qp->s_psn = qp->s_next_psn;
1330                qp->s_sending_psn = qp->s_next_psn;
1331                qp->s_last_psn = qp->s_next_psn - 1;
1332                qp->s_sending_hpsn = qp->s_last_psn;
1333        }
1334
1335        if (attr_mask & IB_QP_RQ_PSN)
1336                qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask;
1337
1338        if (attr_mask & IB_QP_ACCESS_FLAGS)
1339                qp->qp_access_flags = attr->qp_access_flags;
1340
1341        if (attr_mask & IB_QP_AV) {
1342                qp->remote_ah_attr = attr->ah_attr;
1343                qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr);
1344                qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
1345        }
1346
1347        if (attr_mask & IB_QP_ALT_PATH) {
1348                qp->alt_ah_attr = attr->alt_ah_attr;
1349                qp->s_alt_pkey_index = attr->alt_pkey_index;
1350        }
1351
1352        if (attr_mask & IB_QP_PATH_MIG_STATE) {
1353                qp->s_mig_state = attr->path_mig_state;
1354                if (mig) {
1355                        qp->remote_ah_attr = qp->alt_ah_attr;
1356                        qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
1357                        qp->s_pkey_index = qp->s_alt_pkey_index;
1358                }
1359        }
1360
1361        if (attr_mask & IB_QP_PATH_MTU) {
1362                qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
1363                qp->log_pmtu = ilog2(qp->pmtu);
1364        }
1365
1366        if (attr_mask & IB_QP_RETRY_CNT) {
1367                qp->s_retry_cnt = attr->retry_cnt;
1368                qp->s_retry = attr->retry_cnt;
1369        }
1370
1371        if (attr_mask & IB_QP_RNR_RETRY) {
1372                qp->s_rnr_retry_cnt = attr->rnr_retry;
1373                qp->s_rnr_retry = attr->rnr_retry;
1374        }
1375
1376        if (attr_mask & IB_QP_MIN_RNR_TIMER)
1377                qp->r_min_rnr_timer = attr->min_rnr_timer;
1378
1379        if (attr_mask & IB_QP_TIMEOUT) {
1380                qp->timeout = attr->timeout;
1381                qp->timeout_jiffies = rvt_timeout_to_jiffies(qp->timeout);
1382        }
1383
1384        if (attr_mask & IB_QP_QKEY)
1385                qp->qkey = attr->qkey;
1386
1387        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1388                qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
1389
1390        if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
1391                qp->s_max_rd_atomic = attr->max_rd_atomic;
1392
1393        if (rdi->driver_f.modify_qp)
1394                rdi->driver_f.modify_qp(qp, attr, attr_mask, udata);
1395
1396        spin_unlock(&qp->s_lock);
1397        spin_unlock(&qp->s_hlock);
1398        spin_unlock_irq(&qp->r_lock);
1399
1400        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1401                rvt_insert_qp(rdi, qp);
1402
1403        if (lastwqe) {
1404                ev.device = qp->ibqp.device;
1405                ev.element.qp = &qp->ibqp;
1406                ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1407                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1408        }
1409        if (mig) {
1410                ev.device = qp->ibqp.device;
1411                ev.element.qp = &qp->ibqp;
1412                ev.event = IB_EVENT_PATH_MIG;
1413                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1414        }
1415        return 0;
1416
1417inval:
1418        spin_unlock(&qp->s_lock);
1419        spin_unlock(&qp->s_hlock);
1420        spin_unlock_irq(&qp->r_lock);
1421        return -EINVAL;
1422}
1423
1424/**
1425 * rvt_destroy_qp - destroy a queue pair
1426 * @ibqp: the queue pair to destroy
1427 *
1428 * Note that this can be called while the QP is actively sending or
1429 * receiving!
1430 *
1431 * Return: 0 on success.
1432 */
1433int rvt_destroy_qp(struct ib_qp *ibqp)
1434{
1435        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1436        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1437
1438        spin_lock_irq(&qp->r_lock);
1439        spin_lock(&qp->s_hlock);
1440        spin_lock(&qp->s_lock);
1441        rvt_reset_qp(rdi, qp, ibqp->qp_type);
1442        spin_unlock(&qp->s_lock);
1443        spin_unlock(&qp->s_hlock);
1444        spin_unlock_irq(&qp->r_lock);
1445
1446        /* qpn is now available for use again */
1447        rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
1448
1449        spin_lock(&rdi->n_qps_lock);
1450        rdi->n_qps_allocated--;
1451        if (qp->ibqp.qp_type == IB_QPT_RC) {
1452                rdi->n_rc_qps--;
1453                rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
1454        }
1455        spin_unlock(&rdi->n_qps_lock);
1456
1457        if (qp->ip)
1458                kref_put(&qp->ip->ref, rvt_release_mmap_info);
1459        else
1460                vfree(qp->r_rq.wq);
1461        vfree(qp->s_wq);
1462        rdi->driver_f.qp_priv_free(rdi, qp);
1463        kfree(qp->s_ack_queue);
1464        kfree(qp);
1465        return 0;
1466}
1467
1468/**
1469 * rvt_query_qp - query an ipbq
1470 * @ibqp: IB qp to query
1471 * @attr: attr struct to fill in
1472 * @attr_mask: attr mask ignored
1473 * @init_attr: struct to fill in
1474 *
1475 * Return: always 0
1476 */
1477int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1478                 int attr_mask, struct ib_qp_init_attr *init_attr)
1479{
1480        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1481        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1482
1483        attr->qp_state = qp->state;
1484        attr->cur_qp_state = attr->qp_state;
1485        attr->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
1486        attr->path_mig_state = qp->s_mig_state;
1487        attr->qkey = qp->qkey;
1488        attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask;
1489        attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask;
1490        attr->dest_qp_num = qp->remote_qpn;
1491        attr->qp_access_flags = qp->qp_access_flags;
1492        attr->cap.max_send_wr = qp->s_size - 1 -
1493                rdi->dparms.reserved_operations;
1494        attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
1495        attr->cap.max_send_sge = qp->s_max_sge;
1496        attr->cap.max_recv_sge = qp->r_rq.max_sge;
1497        attr->cap.max_inline_data = 0;
1498        attr->ah_attr = qp->remote_ah_attr;
1499        attr->alt_ah_attr = qp->alt_ah_attr;
1500        attr->pkey_index = qp->s_pkey_index;
1501        attr->alt_pkey_index = qp->s_alt_pkey_index;
1502        attr->en_sqd_async_notify = 0;
1503        attr->sq_draining = qp->s_draining;
1504        attr->max_rd_atomic = qp->s_max_rd_atomic;
1505        attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
1506        attr->min_rnr_timer = qp->r_min_rnr_timer;
1507        attr->port_num = qp->port_num;
1508        attr->timeout = qp->timeout;
1509        attr->retry_cnt = qp->s_retry_cnt;
1510        attr->rnr_retry = qp->s_rnr_retry_cnt;
1511        attr->alt_port_num =
1512                rdma_ah_get_port_num(&qp->alt_ah_attr);
1513        attr->alt_timeout = qp->alt_timeout;
1514
1515        init_attr->event_handler = qp->ibqp.event_handler;
1516        init_attr->qp_context = qp->ibqp.qp_context;
1517        init_attr->send_cq = qp->ibqp.send_cq;
1518        init_attr->recv_cq = qp->ibqp.recv_cq;
1519        init_attr->srq = qp->ibqp.srq;
1520        init_attr->cap = attr->cap;
1521        if (qp->s_flags & RVT_S_SIGNAL_REQ_WR)
1522                init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
1523        else
1524                init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1525        init_attr->qp_type = qp->ibqp.qp_type;
1526        init_attr->port_num = qp->port_num;
1527        return 0;
1528}
1529
1530/**
1531 * rvt_post_receive - post a receive on a QP
1532 * @ibqp: the QP to post the receive on
1533 * @wr: the WR to post
1534 * @bad_wr: the first bad WR is put here
1535 *
1536 * This may be called from interrupt context.
1537 *
1538 * Return: 0 on success otherwise errno
1539 */
1540int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1541                  struct ib_recv_wr **bad_wr)
1542{
1543        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1544        struct rvt_rwq *wq = qp->r_rq.wq;
1545        unsigned long flags;
1546        int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) &&
1547                                !qp->ibqp.srq;
1548
1549        /* Check that state is OK to post receive. */
1550        if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) {
1551                *bad_wr = wr;
1552                return -EINVAL;
1553        }
1554
1555        for (; wr; wr = wr->next) {
1556                struct rvt_rwqe *wqe;
1557                u32 next;
1558                int i;
1559
1560                if ((unsigned)wr->num_sge > qp->r_rq.max_sge) {
1561                        *bad_wr = wr;
1562                        return -EINVAL;
1563                }
1564
1565                spin_lock_irqsave(&qp->r_rq.lock, flags);
1566                next = wq->head + 1;
1567                if (next >= qp->r_rq.size)
1568                        next = 0;
1569                if (next == wq->tail) {
1570                        spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1571                        *bad_wr = wr;
1572                        return -ENOMEM;
1573                }
1574                if (unlikely(qp_err_flush)) {
1575                        struct ib_wc wc;
1576
1577                        memset(&wc, 0, sizeof(wc));
1578                        wc.qp = &qp->ibqp;
1579                        wc.opcode = IB_WC_RECV;
1580                        wc.wr_id = wr->wr_id;
1581                        wc.status = IB_WC_WR_FLUSH_ERR;
1582                        rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1583                } else {
1584                        wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head);
1585                        wqe->wr_id = wr->wr_id;
1586                        wqe->num_sge = wr->num_sge;
1587                        for (i = 0; i < wr->num_sge; i++)
1588                                wqe->sg_list[i] = wr->sg_list[i];
1589                        /*
1590                         * Make sure queue entry is written
1591                         * before the head index.
1592                         */
1593                        smp_wmb();
1594                        wq->head = next;
1595                }
1596                spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1597        }
1598        return 0;
1599}
1600
1601/**
1602 * rvt_qp_valid_operation - validate post send wr request
1603 * @qp - the qp
1604 * @post-parms - the post send table for the driver
1605 * @wr - the work request
1606 *
1607 * The routine validates the operation based on the
1608 * validation table an returns the length of the operation
1609 * which can extend beyond the ib_send_bw.  Operation
1610 * dependent flags key atomic operation validation.
1611 *
1612 * There is an exception for UD qps that validates the pd and
1613 * overrides the length to include the additional UD specific
1614 * length.
1615 *
1616 * Returns a negative error or the length of the work request
1617 * for building the swqe.
1618 */
1619static inline int rvt_qp_valid_operation(
1620        struct rvt_qp *qp,
1621        const struct rvt_operation_params *post_parms,
1622        struct ib_send_wr *wr)
1623{
1624        int len;
1625
1626        if (wr->opcode >= RVT_OPERATION_MAX || !post_parms[wr->opcode].length)
1627                return -EINVAL;
1628        if (!(post_parms[wr->opcode].qpt_support & BIT(qp->ibqp.qp_type)))
1629                return -EINVAL;
1630        if ((post_parms[wr->opcode].flags & RVT_OPERATION_PRIV) &&
1631            ibpd_to_rvtpd(qp->ibqp.pd)->user)
1632                return -EINVAL;
1633        if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC_SGE &&
1634            (wr->num_sge == 0 ||
1635             wr->sg_list[0].length < sizeof(u64) ||
1636             wr->sg_list[0].addr & (sizeof(u64) - 1)))
1637                return -EINVAL;
1638        if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC &&
1639            !qp->s_max_rd_atomic)
1640                return -EINVAL;
1641        len = post_parms[wr->opcode].length;
1642        /* UD specific */
1643        if (qp->ibqp.qp_type != IB_QPT_UC &&
1644            qp->ibqp.qp_type != IB_QPT_RC) {
1645                if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
1646                        return -EINVAL;
1647                len = sizeof(struct ib_ud_wr);
1648        }
1649        return len;
1650}
1651
1652/**
1653 * rvt_qp_is_avail - determine queue capacity
1654 * @qp - the qp
1655 * @rdi - the rdmavt device
1656 * @reserved_op - is reserved operation
1657 *
1658 * This assumes the s_hlock is held but the s_last
1659 * qp variable is uncontrolled.
1660 *
1661 * For non reserved operations, the qp->s_avail
1662 * may be changed.
1663 *
1664 * The return value is zero or a -ENOMEM.
1665 */
1666static inline int rvt_qp_is_avail(
1667        struct rvt_qp *qp,
1668        struct rvt_dev_info *rdi,
1669        bool reserved_op)
1670{
1671        u32 slast;
1672        u32 avail;
1673        u32 reserved_used;
1674
1675        /* see rvt_qp_wqe_unreserve() */
1676        smp_mb__before_atomic();
1677        reserved_used = atomic_read(&qp->s_reserved_used);
1678        if (unlikely(reserved_op)) {
1679                /* see rvt_qp_wqe_unreserve() */
1680                smp_mb__before_atomic();
1681                if (reserved_used >= rdi->dparms.reserved_operations)
1682                        return -ENOMEM;
1683                return 0;
1684        }
1685        /* non-reserved operations */
1686        if (likely(qp->s_avail))
1687                return 0;
1688        smp_read_barrier_depends(); /* see rc.c */
1689        slast = ACCESS_ONCE(qp->s_last);
1690        if (qp->s_head >= slast)
1691                avail = qp->s_size - (qp->s_head - slast);
1692        else
1693                avail = slast - qp->s_head;
1694
1695        /* see rvt_qp_wqe_unreserve() */
1696        smp_mb__before_atomic();
1697        reserved_used = atomic_read(&qp->s_reserved_used);
1698        avail =  avail - 1 -
1699                (rdi->dparms.reserved_operations - reserved_used);
1700        /* insure we don't assign a negative s_avail */
1701        if ((s32)avail <= 0)
1702                return -ENOMEM;
1703        qp->s_avail = avail;
1704        if (WARN_ON(qp->s_avail >
1705                    (qp->s_size - 1 - rdi->dparms.reserved_operations)))
1706                rvt_pr_err(rdi,
1707                           "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u",
1708                           qp->ibqp.qp_num, qp->s_size, qp->s_avail,
1709                           qp->s_head, qp->s_tail, qp->s_cur,
1710                           qp->s_acked, qp->s_last);
1711        return 0;
1712}
1713
1714/**
1715 * rvt_post_one_wr - post one RC, UC, or UD send work request
1716 * @qp: the QP to post on
1717 * @wr: the work request to send
1718 */
1719static int rvt_post_one_wr(struct rvt_qp *qp,
1720                           struct ib_send_wr *wr,
1721                           int *call_send)
1722{
1723        struct rvt_swqe *wqe;
1724        u32 next;
1725        int i;
1726        int j;
1727        int acc;
1728        struct rvt_lkey_table *rkt;
1729        struct rvt_pd *pd;
1730        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
1731        u8 log_pmtu;
1732        int ret;
1733        size_t cplen;
1734        bool reserved_op;
1735        int local_ops_delayed = 0;
1736
1737        BUILD_BUG_ON(IB_QPT_MAX >= (sizeof(u32) * BITS_PER_BYTE));
1738
1739        /* IB spec says that num_sge == 0 is OK. */
1740        if (unlikely(wr->num_sge > qp->s_max_sge))
1741                return -EINVAL;
1742
1743        ret = rvt_qp_valid_operation(qp, rdi->post_parms, wr);
1744        if (ret < 0)
1745                return ret;
1746        cplen = ret;
1747
1748        /*
1749         * Local operations include fast register and local invalidate.
1750         * Fast register needs to be processed immediately because the
1751         * registered lkey may be used by following work requests and the
1752         * lkey needs to be valid at the time those requests are posted.
1753         * Local invalidate can be processed immediately if fencing is
1754         * not required and no previous local invalidate ops are pending.
1755         * Signaled local operations that have been processed immediately
1756         * need to have requests with "completion only" flags set posted
1757         * to the send queue in order to generate completions.
1758         */
1759        if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) {
1760                switch (wr->opcode) {
1761                case IB_WR_REG_MR:
1762                        ret = rvt_fast_reg_mr(qp,
1763                                              reg_wr(wr)->mr,
1764                                              reg_wr(wr)->key,
1765                                              reg_wr(wr)->access);
1766                        if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
1767                                return ret;
1768                        break;
1769                case IB_WR_LOCAL_INV:
1770                        if ((wr->send_flags & IB_SEND_FENCE) ||
1771                            atomic_read(&qp->local_ops_pending)) {
1772                                local_ops_delayed = 1;
1773                        } else {
1774                                ret = rvt_invalidate_rkey(
1775                                        qp, wr->ex.invalidate_rkey);
1776                                if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
1777                                        return ret;
1778                        }
1779                        break;
1780                default:
1781                        return -EINVAL;
1782                }
1783        }
1784
1785        reserved_op = rdi->post_parms[wr->opcode].flags &
1786                        RVT_OPERATION_USE_RESERVE;
1787        /* check for avail */
1788        ret = rvt_qp_is_avail(qp, rdi, reserved_op);
1789        if (ret)
1790                return ret;
1791        next = qp->s_head + 1;
1792        if (next >= qp->s_size)
1793                next = 0;
1794
1795        rkt = &rdi->lkey_table;
1796        pd = ibpd_to_rvtpd(qp->ibqp.pd);
1797        wqe = rvt_get_swqe_ptr(qp, qp->s_head);
1798
1799        /* cplen has length from above */
1800        memcpy(&wqe->wr, wr, cplen);
1801
1802        wqe->length = 0;
1803        j = 0;
1804        if (wr->num_sge) {
1805                struct rvt_sge *last_sge = NULL;
1806
1807                acc = wr->opcode >= IB_WR_RDMA_READ ?
1808                        IB_ACCESS_LOCAL_WRITE : 0;
1809                for (i = 0; i < wr->num_sge; i++) {
1810                        u32 length = wr->sg_list[i].length;
1811
1812                        if (length == 0)
1813                                continue;
1814                        ret = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge,
1815                                          &wr->sg_list[i], acc);
1816                        if (unlikely(ret < 0))
1817                                goto bail_inval_free;
1818                        wqe->length += length;
1819                        if (ret)
1820                                last_sge = &wqe->sg_list[j];
1821                        j += ret;
1822                }
1823                wqe->wr.num_sge = j;
1824        }
1825
1826        /* general part of wqe valid - allow for driver checks */
1827        if (rdi->driver_f.check_send_wqe) {
1828                ret = rdi->driver_f.check_send_wqe(qp, wqe);
1829                if (ret < 0)
1830                        goto bail_inval_free;
1831                if (ret)
1832                        *call_send = ret;
1833        }
1834
1835        log_pmtu = qp->log_pmtu;
1836        if (qp->ibqp.qp_type != IB_QPT_UC &&
1837            qp->ibqp.qp_type != IB_QPT_RC) {
1838                struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
1839
1840                log_pmtu = ah->log_pmtu;
1841                atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
1842        }
1843
1844        if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) {
1845                if (local_ops_delayed)
1846                        atomic_inc(&qp->local_ops_pending);
1847                else
1848                        wqe->wr.send_flags |= RVT_SEND_COMPLETION_ONLY;
1849                wqe->ssn = 0;
1850                wqe->psn = 0;
1851                wqe->lpsn = 0;
1852        } else {
1853                wqe->ssn = qp->s_ssn++;
1854                wqe->psn = qp->s_next_psn;
1855                wqe->lpsn = wqe->psn +
1856                                (wqe->length ?
1857                                        ((wqe->length - 1) >> log_pmtu) :
1858                                        0);
1859                qp->s_next_psn = wqe->lpsn + 1;
1860        }
1861        if (unlikely(reserved_op)) {
1862                wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
1863                rvt_qp_wqe_reserve(qp, wqe);
1864        } else {
1865                wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
1866                qp->s_avail--;
1867        }
1868        trace_rvt_post_one_wr(qp, wqe, wr->num_sge);
1869        smp_wmb(); /* see request builders */
1870        qp->s_head = next;
1871
1872        return 0;
1873
1874bail_inval_free:
1875        /* release mr holds */
1876        while (j) {
1877                struct rvt_sge *sge = &wqe->sg_list[--j];
1878
1879                rvt_put_mr(sge->mr);
1880        }
1881        return ret;
1882}
1883
1884/**
1885 * rvt_post_send - post a send on a QP
1886 * @ibqp: the QP to post the send on
1887 * @wr: the list of work requests to post
1888 * @bad_wr: the first bad WR is put here
1889 *
1890 * This may be called from interrupt context.
1891 *
1892 * Return: 0 on success else errno
1893 */
1894int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1895                  struct ib_send_wr **bad_wr)
1896{
1897        struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1898        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1899        unsigned long flags = 0;
1900        int call_send;
1901        unsigned nreq = 0;
1902        int err = 0;
1903
1904        spin_lock_irqsave(&qp->s_hlock, flags);
1905
1906        /*
1907         * Ensure QP state is such that we can send. If not bail out early,
1908         * there is no need to do this every time we post a send.
1909         */
1910        if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
1911                spin_unlock_irqrestore(&qp->s_hlock, flags);
1912                return -EINVAL;
1913        }
1914
1915        /*
1916         * If the send queue is empty, and we only have a single WR then just go
1917         * ahead and kick the send engine into gear. Otherwise we will always
1918         * just schedule the send to happen later.
1919         */
1920        call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next;
1921
1922        for (; wr; wr = wr->next) {
1923                err = rvt_post_one_wr(qp, wr, &call_send);
1924                if (unlikely(err)) {
1925                        *bad_wr = wr;
1926                        goto bail;
1927                }
1928                nreq++;
1929        }
1930bail:
1931        spin_unlock_irqrestore(&qp->s_hlock, flags);
1932        if (nreq) {
1933                if (call_send)
1934                        rdi->driver_f.do_send(qp);
1935                else
1936                        rdi->driver_f.schedule_send_no_lock(qp);
1937        }
1938        return err;
1939}
1940
1941/**
1942 * rvt_post_srq_receive - post a receive on a shared receive queue
1943 * @ibsrq: the SRQ to post the receive on
1944 * @wr: the list of work requests to post
1945 * @bad_wr: A pointer to the first WR to cause a problem is put here
1946 *
1947 * This may be called from interrupt context.
1948 *
1949 * Return: 0 on success else errno
1950 */
1951int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
1952                      struct ib_recv_wr **bad_wr)
1953{
1954        struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
1955        struct rvt_rwq *wq;
1956        unsigned long flags;
1957
1958        for (; wr; wr = wr->next) {
1959                struct rvt_rwqe *wqe;
1960                u32 next;
1961                int i;
1962
1963                if ((unsigned)wr->num_sge > srq->rq.max_sge) {
1964                        *bad_wr = wr;
1965                        return -EINVAL;
1966                }
1967
1968                spin_lock_irqsave(&srq->rq.lock, flags);
1969                wq = srq->rq.wq;
1970                next = wq->head + 1;
1971                if (next >= srq->rq.size)
1972                        next = 0;
1973                if (next == wq->tail) {
1974                        spin_unlock_irqrestore(&srq->rq.lock, flags);
1975                        *bad_wr = wr;
1976                        return -ENOMEM;
1977                }
1978
1979                wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head);
1980                wqe->wr_id = wr->wr_id;
1981                wqe->num_sge = wr->num_sge;
1982                for (i = 0; i < wr->num_sge; i++)
1983                        wqe->sg_list[i] = wr->sg_list[i];
1984                /* Make sure queue entry is written before the head index. */
1985                smp_wmb();
1986                wq->head = next;
1987                spin_unlock_irqrestore(&srq->rq.lock, flags);
1988        }
1989        return 0;
1990}
1991
1992/**
1993 * qp_comm_est - handle trap with QP established
1994 * @qp: the QP
1995 */
1996void rvt_comm_est(struct rvt_qp *qp)
1997{
1998        qp->r_flags |= RVT_R_COMM_EST;
1999        if (qp->ibqp.event_handler) {
2000                struct ib_event ev;
2001
2002                ev.device = qp->ibqp.device;
2003                ev.element.qp = &qp->ibqp;
2004                ev.event = IB_EVENT_COMM_EST;
2005                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
2006        }
2007}
2008EXPORT_SYMBOL(rvt_comm_est);
2009
2010void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
2011{
2012        unsigned long flags;
2013        int lastwqe;
2014
2015        spin_lock_irqsave(&qp->s_lock, flags);
2016        lastwqe = rvt_error_qp(qp, err);
2017        spin_unlock_irqrestore(&qp->s_lock, flags);
2018
2019        if (lastwqe) {
2020                struct ib_event ev;
2021
2022                ev.device = qp->ibqp.device;
2023                ev.element.qp = &qp->ibqp;
2024                ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
2025                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
2026        }
2027}
2028EXPORT_SYMBOL(rvt_rc_error);
2029
2030/*
2031 *  rvt_rnr_tbl_to_usec - return index into ib_rvt_rnr_table
2032 *  @index - the index
2033 *  return usec from an index into ib_rvt_rnr_table
2034 */
2035unsigned long rvt_rnr_tbl_to_usec(u32 index)
2036{
2037        return ib_rvt_rnr_table[(index & IB_AETH_CREDIT_MASK)];
2038}
2039EXPORT_SYMBOL(rvt_rnr_tbl_to_usec);
2040
2041static inline unsigned long rvt_aeth_to_usec(u32 aeth)
2042{
2043        return ib_rvt_rnr_table[(aeth >> IB_AETH_CREDIT_SHIFT) &
2044                                  IB_AETH_CREDIT_MASK];
2045}
2046
2047/*
2048 *  rvt_add_retry_timer - add/start a retry timer
2049 *  @qp - the QP
2050 *  add a retry timer on the QP
2051 */
2052void rvt_add_retry_timer(struct rvt_qp *qp)
2053{
2054        struct ib_qp *ibqp = &qp->ibqp;
2055        struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
2056
2057        lockdep_assert_held(&qp->s_lock);
2058        qp->s_flags |= RVT_S_TIMER;
2059       /* 4.096 usec. * (1 << qp->timeout) */
2060        qp->s_timer.expires = jiffies + qp->timeout_jiffies +
2061                             rdi->busy_jiffies;
2062        add_timer(&qp->s_timer);
2063}
2064EXPORT_SYMBOL(rvt_add_retry_timer);
2065
2066/**
2067 * rvt_add_rnr_timer - add/start an rnr timer
2068 * @qp - the QP
2069 * @aeth - aeth of RNR timeout, simulated aeth for loopback
2070 * add an rnr timer on the QP
2071 */
2072void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth)
2073{
2074        u32 to;
2075
2076        lockdep_assert_held(&qp->s_lock);
2077        qp->s_flags |= RVT_S_WAIT_RNR;
2078        to = rvt_aeth_to_usec(aeth);
2079        hrtimer_start(&qp->s_rnr_timer,
2080                      ns_to_ktime(1000 * to), HRTIMER_MODE_REL);
2081}
2082EXPORT_SYMBOL(rvt_add_rnr_timer);
2083
2084/**
2085 * rvt_stop_rc_timers - stop all timers
2086 * @qp - the QP
2087 * stop any pending timers
2088 */
2089void rvt_stop_rc_timers(struct rvt_qp *qp)
2090{
2091        lockdep_assert_held(&qp->s_lock);
2092        /* Remove QP from all timers */
2093        if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
2094                qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
2095                del_timer(&qp->s_timer);
2096                hrtimer_try_to_cancel(&qp->s_rnr_timer);
2097        }
2098}
2099EXPORT_SYMBOL(rvt_stop_rc_timers);
2100
2101/**
2102 * rvt_stop_rnr_timer - stop an rnr timer
2103 * @qp - the QP
2104 *
2105 * stop an rnr timer and return if the timer
2106 * had been pending.
2107 */
2108static int rvt_stop_rnr_timer(struct rvt_qp *qp)
2109{
2110        int rval = 0;
2111
2112        lockdep_assert_held(&qp->s_lock);
2113        /* Remove QP from rnr timer */
2114        if (qp->s_flags & RVT_S_WAIT_RNR) {
2115                qp->s_flags &= ~RVT_S_WAIT_RNR;
2116                rval = hrtimer_try_to_cancel(&qp->s_rnr_timer);
2117        }
2118        return rval;
2119}
2120
2121/**
2122 * rvt_del_timers_sync - wait for any timeout routines to exit
2123 * @qp - the QP
2124 */
2125void rvt_del_timers_sync(struct rvt_qp *qp)
2126{
2127        del_timer_sync(&qp->s_timer);
2128        hrtimer_cancel(&qp->s_rnr_timer);
2129}
2130EXPORT_SYMBOL(rvt_del_timers_sync);
2131
2132/**
2133 * This is called from s_timer for missing responses.
2134 */
2135static void rvt_rc_timeout(unsigned long arg)
2136{
2137        struct rvt_qp *qp = (struct rvt_qp *)arg;
2138        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2139        unsigned long flags;
2140
2141        spin_lock_irqsave(&qp->r_lock, flags);
2142        spin_lock(&qp->s_lock);
2143        if (qp->s_flags & RVT_S_TIMER) {
2144                struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
2145
2146                qp->s_flags &= ~RVT_S_TIMER;
2147                rvp->n_rc_timeouts++;
2148                del_timer(&qp->s_timer);
2149                trace_rvt_rc_timeout(qp, qp->s_last_psn + 1);
2150                if (rdi->driver_f.notify_restart_rc)
2151                        rdi->driver_f.notify_restart_rc(qp,
2152                                                        qp->s_last_psn + 1,
2153                                                        1);
2154                rdi->driver_f.schedule_send(qp);
2155        }
2156        spin_unlock(&qp->s_lock);
2157        spin_unlock_irqrestore(&qp->r_lock, flags);
2158}
2159
2160/*
2161 * This is called from s_timer for RNR timeouts.
2162 */
2163enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t)
2164{
2165        struct rvt_qp *qp = container_of(t, struct rvt_qp, s_rnr_timer);
2166        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2167        unsigned long flags;
2168
2169        spin_lock_irqsave(&qp->s_lock, flags);
2170        rvt_stop_rnr_timer(qp);
2171        rdi->driver_f.schedule_send(qp);
2172        spin_unlock_irqrestore(&qp->s_lock, flags);
2173        return HRTIMER_NORESTART;
2174}
2175EXPORT_SYMBOL(rvt_rc_rnr_retry);
2176
2177/**
2178 * rvt_qp_iter_init - initial for QP iteration
2179 * @rdi - rvt devinfo
2180 * @v - u64 value
2181 *
2182 * This returns an iterator suitable for iterating QPs
2183 * in the system.
2184 *
2185 * The @cb is a user defined callback and @v is a 64
2186 * bit value passed to and relevant for processing in the
2187 * @cb.  An example use case would be to alter QP processing
2188 * based on criteria not part of the rvt_qp.
2189 *
2190 * Use cases that require memory allocation to succeed
2191 * must preallocate appropriately.
2192 *
2193 * Return: a pointer to an rvt_qp_iter or NULL
2194 */
2195struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi,
2196                                     u64 v,
2197                                     void (*cb)(struct rvt_qp *qp, u64 v))
2198{
2199        struct rvt_qp_iter *i;
2200
2201        i = kzalloc(sizeof(*i), GFP_KERNEL);
2202        if (!i)
2203                return NULL;
2204
2205        i->rdi = rdi;
2206        /* number of special QPs (SMI/GSI) for device */
2207        i->specials = rdi->ibdev.phys_port_cnt * 2;
2208        i->v = v;
2209        i->cb = cb;
2210
2211        return i;
2212}
2213EXPORT_SYMBOL(rvt_qp_iter_init);
2214
2215/**
2216 * rvt_qp_iter_next - return the next QP in iter
2217 * @iter - the iterator
2218 *
2219 * Fine grained QP iterator suitable for use
2220 * with debugfs seq_file mechanisms.
2221 *
2222 * Updates iter->qp with the current QP when the return
2223 * value is 0.
2224 *
2225 * Return: 0 - iter->qp is valid 1 - no more QPs
2226 */
2227int rvt_qp_iter_next(struct rvt_qp_iter *iter)
2228        __must_hold(RCU)
2229{
2230        int n = iter->n;
2231        int ret = 1;
2232        struct rvt_qp *pqp = iter->qp;
2233        struct rvt_qp *qp;
2234        struct rvt_dev_info *rdi = iter->rdi;
2235
2236        /*
2237         * The approach is to consider the special qps
2238         * as additional table entries before the
2239         * real hash table.  Since the qp code sets
2240         * the qp->next hash link to NULL, this works just fine.
2241         *
2242         * iter->specials is 2 * # ports
2243         *
2244         * n = 0..iter->specials is the special qp indices
2245         *
2246         * n = iter->specials..rdi->qp_dev->qp_table_size+iter->specials are
2247         * the potential hash bucket entries
2248         *
2249         */
2250        for (; n <  rdi->qp_dev->qp_table_size + iter->specials; n++) {
2251                if (pqp) {
2252                        qp = rcu_dereference(pqp->next);
2253                } else {
2254                        if (n < iter->specials) {
2255                                struct rvt_ibport *rvp;
2256                                int pidx;
2257
2258                                pidx = n % rdi->ibdev.phys_port_cnt;
2259                                rvp = rdi->ports[pidx];
2260                                qp = rcu_dereference(rvp->qp[n & 1]);
2261                        } else {
2262                                qp = rcu_dereference(
2263                                        rdi->qp_dev->qp_table[
2264                                                (n - iter->specials)]);
2265                        }
2266                }
2267                pqp = qp;
2268                if (qp) {
2269                        iter->qp = qp;
2270                        iter->n = n;
2271                        return 0;
2272                }
2273        }
2274        return ret;
2275}
2276EXPORT_SYMBOL(rvt_qp_iter_next);
2277
2278/**
2279 * rvt_qp_iter - iterate all QPs
2280 * @rdi - rvt devinfo
2281 * @v - a 64 bit value
2282 * @cb - a callback
2283 *
2284 * This provides a way for iterating all QPs.
2285 *
2286 * The @cb is a user defined callback and @v is a 64
2287 * bit value passed to and relevant for processing in the
2288 * cb.  An example use case would be to alter QP processing
2289 * based on criteria not part of the rvt_qp.
2290 *
2291 * The code has an internal iterator to simplify
2292 * non seq_file use cases.
2293 */
2294void rvt_qp_iter(struct rvt_dev_info *rdi,
2295                 u64 v,
2296                 void (*cb)(struct rvt_qp *qp, u64 v))
2297{
2298        int ret;
2299        struct rvt_qp_iter i = {
2300                .rdi = rdi,
2301                .specials = rdi->ibdev.phys_port_cnt * 2,
2302                .v = v,
2303                .cb = cb
2304        };
2305
2306        rcu_read_lock();
2307        do {
2308                ret = rvt_qp_iter_next(&i);
2309                if (!ret) {
2310                        rvt_get_qp(i.qp);
2311                        rcu_read_unlock();
2312                        i.cb(i.qp, i.v);
2313                        rcu_read_lock();
2314                        rvt_put_qp(i.qp);
2315                }
2316        } while (!ret);
2317        rcu_read_unlock();
2318}
2319EXPORT_SYMBOL(rvt_qp_iter);
2320