linux/drivers/staging/rdma/hfi1/qp.c
<<
>>
Prefs
   1/*
   2 *
   3 * This file is provided under a dual BSD/GPLv2 license.  When using or
   4 * redistributing this file, you may do so under either license.
   5 *
   6 * GPL LICENSE SUMMARY
   7 *
   8 * Copyright(c) 2015 Intel Corporation.
   9 *
  10 * This program is free software; you can redistribute it and/or modify
  11 * it under the terms of version 2 of the GNU General Public License as
  12 * published by the Free Software Foundation.
  13 *
  14 * This program is distributed in the hope that it will be useful, but
  15 * WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * General Public License for more details.
  18 *
  19 * BSD LICENSE
  20 *
  21 * Copyright(c) 2015 Intel Corporation.
  22 *
  23 * Redistribution and use in source and binary forms, with or without
  24 * modification, are permitted provided that the following conditions
  25 * are met:
  26 *
  27 *  - Redistributions of source code must retain the above copyright
  28 *    notice, this list of conditions and the following disclaimer.
  29 *  - Redistributions in binary form must reproduce the above copyright
  30 *    notice, this list of conditions and the following disclaimer in
  31 *    the documentation and/or other materials provided with the
  32 *    distribution.
  33 *  - Neither the name of Intel Corporation nor the names of its
  34 *    contributors may be used to endorse or promote products derived
  35 *    from this software without specific prior written permission.
  36 *
  37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 *
  49 */
  50
  51#include <linux/err.h>
  52#include <linux/vmalloc.h>
  53#include <linux/hash.h>
  54#include <linux/module.h>
  55#include <linux/random.h>
  56#include <linux/seq_file.h>
  57
  58#include "hfi.h"
  59#include "qp.h"
  60#include "trace.h"
  61#include "sdma.h"
  62
  63#define BITS_PER_PAGE           (PAGE_SIZE*BITS_PER_BYTE)
  64#define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
  65
  66static unsigned int hfi1_qp_table_size = 256;
  67module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO);
  68MODULE_PARM_DESC(qp_table_size, "QP table size");
  69
  70static void flush_tx_list(struct hfi1_qp *qp);
  71static int iowait_sleep(
  72        struct sdma_engine *sde,
  73        struct iowait *wait,
  74        struct sdma_txreq *stx,
  75        unsigned seq);
  76static void iowait_wakeup(struct iowait *wait, int reason);
  77
  78static inline unsigned mk_qpn(struct hfi1_qpn_table *qpt,
  79                              struct qpn_map *map, unsigned off)
  80{
  81        return (map - qpt->map) * BITS_PER_PAGE + off;
  82}
  83
  84/*
  85 * Convert the AETH credit code into the number of credits.
  86 */
  87static const u16 credit_table[31] = {
  88        0,                      /* 0 */
  89        1,                      /* 1 */
  90        2,                      /* 2 */
  91        3,                      /* 3 */
  92        4,                      /* 4 */
  93        6,                      /* 5 */
  94        8,                      /* 6 */
  95        12,                     /* 7 */
  96        16,                     /* 8 */
  97        24,                     /* 9 */
  98        32,                     /* A */
  99        48,                     /* B */
 100        64,                     /* C */
 101        96,                     /* D */
 102        128,                    /* E */
 103        192,                    /* F */
 104        256,                    /* 10 */
 105        384,                    /* 11 */
 106        512,                    /* 12 */
 107        768,                    /* 13 */
 108        1024,                   /* 14 */
 109        1536,                   /* 15 */
 110        2048,                   /* 16 */
 111        3072,                   /* 17 */
 112        4096,                   /* 18 */
 113        6144,                   /* 19 */
 114        8192,                   /* 1A */
 115        12288,                  /* 1B */
 116        16384,                  /* 1C */
 117        24576,                  /* 1D */
 118        32768                   /* 1E */
 119};
 120
 121static void get_map_page(struct hfi1_qpn_table *qpt, struct qpn_map *map)
 122{
 123        unsigned long page = get_zeroed_page(GFP_KERNEL);
 124
 125        /*
 126         * Free the page if someone raced with us installing it.
 127         */
 128
 129        spin_lock(&qpt->lock);
 130        if (map->page)
 131                free_page(page);
 132        else
 133                map->page = (void *)page;
 134        spin_unlock(&qpt->lock);
 135}
 136
 137/*
 138 * Allocate the next available QPN or
 139 * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
 140 */
 141static int alloc_qpn(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt,
 142                     enum ib_qp_type type, u8 port)
 143{
 144        u32 i, offset, max_scan, qpn;
 145        struct qpn_map *map;
 146        u32 ret;
 147
 148        if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
 149                unsigned n;
 150
 151                ret = type == IB_QPT_GSI;
 152                n = 1 << (ret + 2 * (port - 1));
 153                spin_lock(&qpt->lock);
 154                if (qpt->flags & n)
 155                        ret = -EINVAL;
 156                else
 157                        qpt->flags |= n;
 158                spin_unlock(&qpt->lock);
 159                goto bail;
 160        }
 161
 162        qpn = qpt->last + qpt->incr;
 163        if (qpn >= QPN_MAX)
 164                qpn = qpt->incr | ((qpt->last & 1) ^ 1);
 165        /* offset carries bit 0 */
 166        offset = qpn & BITS_PER_PAGE_MASK;
 167        map = &qpt->map[qpn / BITS_PER_PAGE];
 168        max_scan = qpt->nmaps - !offset;
 169        for (i = 0;;) {
 170                if (unlikely(!map->page)) {
 171                        get_map_page(qpt, map);
 172                        if (unlikely(!map->page))
 173                                break;
 174                }
 175                do {
 176                        if (!test_and_set_bit(offset, map->page)) {
 177                                qpt->last = qpn;
 178                                ret = qpn;
 179                                goto bail;
 180                        }
 181                        offset += qpt->incr;
 182                        /*
 183                         * This qpn might be bogus if offset >= BITS_PER_PAGE.
 184                         * That is OK.   It gets re-assigned below
 185                         */
 186                        qpn = mk_qpn(qpt, map, offset);
 187                } while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
 188                /*
 189                 * In order to keep the number of pages allocated to a
 190                 * minimum, we scan the all existing pages before increasing
 191                 * the size of the bitmap table.
 192                 */
 193                if (++i > max_scan) {
 194                        if (qpt->nmaps == QPNMAP_ENTRIES)
 195                                break;
 196                        map = &qpt->map[qpt->nmaps++];
 197                        /* start at incr with current bit 0 */
 198                        offset = qpt->incr | (offset & 1);
 199                } else if (map < &qpt->map[qpt->nmaps]) {
 200                        ++map;
 201                        /* start at incr with current bit 0 */
 202                        offset = qpt->incr | (offset & 1);
 203                } else {
 204                        map = &qpt->map[0];
 205                        /* wrap to first map page, invert bit 0 */
 206                        offset = qpt->incr | ((offset & 1) ^ 1);
 207                }
 208                /* there can be no bits at shift and below */
 209                WARN_ON(offset & (dd->qos_shift - 1));
 210                qpn = mk_qpn(qpt, map, offset);
 211        }
 212
 213        ret = -ENOMEM;
 214
 215bail:
 216        return ret;
 217}
 218
 219static void free_qpn(struct hfi1_qpn_table *qpt, u32 qpn)
 220{
 221        struct qpn_map *map;
 222
 223        map = qpt->map + qpn / BITS_PER_PAGE;
 224        if (map->page)
 225                clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
 226}
 227
 228/*
 229 * Put the QP into the hash table.
 230 * The hash table holds a reference to the QP.
 231 */
 232static void insert_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
 233{
 234        struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 235        unsigned long flags;
 236
 237        atomic_inc(&qp->refcount);
 238        spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
 239
 240        if (qp->ibqp.qp_num <= 1) {
 241                rcu_assign_pointer(ibp->qp[qp->ibqp.qp_num], qp);
 242        } else {
 243                u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num);
 244
 245                qp->next = dev->qp_dev->qp_table[n];
 246                rcu_assign_pointer(dev->qp_dev->qp_table[n], qp);
 247                trace_hfi1_qpinsert(qp, n);
 248        }
 249
 250        spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
 251}
 252
 253/*
 254 * Remove the QP from the table so it can't be found asynchronously by
 255 * the receive interrupt routine.
 256 */
 257static void remove_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
 258{
 259        struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 260        u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num);
 261        unsigned long flags;
 262        int removed = 1;
 263
 264        spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
 265
 266        if (rcu_dereference_protected(ibp->qp[0],
 267                        lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) {
 268                RCU_INIT_POINTER(ibp->qp[0], NULL);
 269        } else if (rcu_dereference_protected(ibp->qp[1],
 270                        lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) {
 271                RCU_INIT_POINTER(ibp->qp[1], NULL);
 272        } else {
 273                struct hfi1_qp *q;
 274                struct hfi1_qp __rcu **qpp;
 275
 276                removed = 0;
 277                qpp = &dev->qp_dev->qp_table[n];
 278                for (; (q = rcu_dereference_protected(*qpp,
 279                                lockdep_is_held(&dev->qp_dev->qpt_lock)))
 280                                        != NULL;
 281                                qpp = &q->next)
 282                        if (q == qp) {
 283                                RCU_INIT_POINTER(*qpp,
 284                                 rcu_dereference_protected(qp->next,
 285                                 lockdep_is_held(&dev->qp_dev->qpt_lock)));
 286                                removed = 1;
 287                                trace_hfi1_qpremove(qp, n);
 288                                break;
 289                        }
 290        }
 291
 292        spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
 293        if (removed) {
 294                synchronize_rcu();
 295                if (atomic_dec_and_test(&qp->refcount))
 296                        wake_up(&qp->wait);
 297        }
 298}
 299
 300/**
 301 * free_all_qps - check for QPs still in use
 302 * @qpt: the QP table to empty
 303 *
 304 * There should not be any QPs still in use.
 305 * Free memory for table.
 306 */
 307static unsigned free_all_qps(struct hfi1_devdata *dd)
 308{
 309        struct hfi1_ibdev *dev = &dd->verbs_dev;
 310        unsigned long flags;
 311        struct hfi1_qp *qp;
 312        unsigned n, qp_inuse = 0;
 313
 314        for (n = 0; n < dd->num_pports; n++) {
 315                struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
 316
 317                if (!hfi1_mcast_tree_empty(ibp))
 318                        qp_inuse++;
 319                rcu_read_lock();
 320                if (rcu_dereference(ibp->qp[0]))
 321                        qp_inuse++;
 322                if (rcu_dereference(ibp->qp[1]))
 323                        qp_inuse++;
 324                rcu_read_unlock();
 325        }
 326
 327        if (!dev->qp_dev)
 328                goto bail;
 329        spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
 330        for (n = 0; n < dev->qp_dev->qp_table_size; n++) {
 331                qp = rcu_dereference_protected(dev->qp_dev->qp_table[n],
 332                        lockdep_is_held(&dev->qp_dev->qpt_lock));
 333                RCU_INIT_POINTER(dev->qp_dev->qp_table[n], NULL);
 334
 335                for (; qp; qp = rcu_dereference_protected(qp->next,
 336                                lockdep_is_held(&dev->qp_dev->qpt_lock)))
 337                        qp_inuse++;
 338        }
 339        spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
 340        synchronize_rcu();
 341bail:
 342        return qp_inuse;
 343}
 344
 345/**
 346 * reset_qp - initialize the QP state to the reset state
 347 * @qp: the QP to reset
 348 * @type: the QP type
 349 */
 350static void reset_qp(struct hfi1_qp *qp, enum ib_qp_type type)
 351{
 352        qp->remote_qpn = 0;
 353        qp->qkey = 0;
 354        qp->qp_access_flags = 0;
 355        iowait_init(
 356                &qp->s_iowait,
 357                1,
 358                hfi1_do_send,
 359                iowait_sleep,
 360                iowait_wakeup);
 361        qp->s_flags &= HFI1_S_SIGNAL_REQ_WR;
 362        qp->s_hdrwords = 0;
 363        qp->s_wqe = NULL;
 364        qp->s_draining = 0;
 365        qp->s_next_psn = 0;
 366        qp->s_last_psn = 0;
 367        qp->s_sending_psn = 0;
 368        qp->s_sending_hpsn = 0;
 369        qp->s_psn = 0;
 370        qp->r_psn = 0;
 371        qp->r_msn = 0;
 372        if (type == IB_QPT_RC) {
 373                qp->s_state = IB_OPCODE_RC_SEND_LAST;
 374                qp->r_state = IB_OPCODE_RC_SEND_LAST;
 375        } else {
 376                qp->s_state = IB_OPCODE_UC_SEND_LAST;
 377                qp->r_state = IB_OPCODE_UC_SEND_LAST;
 378        }
 379        qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 380        qp->r_nak_state = 0;
 381        qp->r_aflags = 0;
 382        qp->r_flags = 0;
 383        qp->s_head = 0;
 384        qp->s_tail = 0;
 385        qp->s_cur = 0;
 386        qp->s_acked = 0;
 387        qp->s_last = 0;
 388        qp->s_ssn = 1;
 389        qp->s_lsn = 0;
 390        clear_ahg(qp);
 391        qp->s_mig_state = IB_MIG_MIGRATED;
 392        memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
 393        qp->r_head_ack_queue = 0;
 394        qp->s_tail_ack_queue = 0;
 395        qp->s_num_rd_atomic = 0;
 396        if (qp->r_rq.wq) {
 397                qp->r_rq.wq->head = 0;
 398                qp->r_rq.wq->tail = 0;
 399        }
 400        qp->r_sge.num_sge = 0;
 401}
 402
 403static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends)
 404{
 405        unsigned n;
 406
 407        if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
 408                hfi1_put_ss(&qp->s_rdma_read_sge);
 409
 410        hfi1_put_ss(&qp->r_sge);
 411
 412        if (clr_sends) {
 413                while (qp->s_last != qp->s_head) {
 414                        struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
 415                        unsigned i;
 416
 417                        for (i = 0; i < wqe->wr.num_sge; i++) {
 418                                struct hfi1_sge *sge = &wqe->sg_list[i];
 419
 420                                hfi1_put_mr(sge->mr);
 421                        }
 422                        if (qp->ibqp.qp_type == IB_QPT_UD ||
 423                            qp->ibqp.qp_type == IB_QPT_SMI ||
 424                            qp->ibqp.qp_type == IB_QPT_GSI)
 425                                atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
 426                        if (++qp->s_last >= qp->s_size)
 427                                qp->s_last = 0;
 428                }
 429                if (qp->s_rdma_mr) {
 430                        hfi1_put_mr(qp->s_rdma_mr);
 431                        qp->s_rdma_mr = NULL;
 432                }
 433        }
 434
 435        if (qp->ibqp.qp_type != IB_QPT_RC)
 436                return;
 437
 438        for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
 439                struct hfi1_ack_entry *e = &qp->s_ack_queue[n];
 440
 441                if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
 442                    e->rdma_sge.mr) {
 443                        hfi1_put_mr(e->rdma_sge.mr);
 444                        e->rdma_sge.mr = NULL;
 445                }
 446        }
 447}
 448
 449/**
 450 * hfi1_error_qp - put a QP into the error state
 451 * @qp: the QP to put into the error state
 452 * @err: the receive completion error to signal if a RWQE is active
 453 *
 454 * Flushes both send and receive work queues.
 455 * Returns true if last WQE event should be generated.
 456 * The QP r_lock and s_lock should be held and interrupts disabled.
 457 * If we are already in error state, just return.
 458 */
 459int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err)
 460{
 461        struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 462        struct ib_wc wc;
 463        int ret = 0;
 464
 465        if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
 466                goto bail;
 467
 468        qp->state = IB_QPS_ERR;
 469
 470        if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
 471                qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
 472                del_timer(&qp->s_timer);
 473        }
 474
 475        if (qp->s_flags & HFI1_S_ANY_WAIT_SEND)
 476                qp->s_flags &= ~HFI1_S_ANY_WAIT_SEND;
 477
 478        write_seqlock(&dev->iowait_lock);
 479        if (!list_empty(&qp->s_iowait.list) && !(qp->s_flags & HFI1_S_BUSY)) {
 480                qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
 481                list_del_init(&qp->s_iowait.list);
 482                if (atomic_dec_and_test(&qp->refcount))
 483                        wake_up(&qp->wait);
 484        }
 485        write_sequnlock(&dev->iowait_lock);
 486
 487        if (!(qp->s_flags & HFI1_S_BUSY)) {
 488                qp->s_hdrwords = 0;
 489                if (qp->s_rdma_mr) {
 490                        hfi1_put_mr(qp->s_rdma_mr);
 491                        qp->s_rdma_mr = NULL;
 492                }
 493                flush_tx_list(qp);
 494        }
 495
 496        /* Schedule the sending tasklet to drain the send work queue. */
 497        if (qp->s_last != qp->s_head)
 498                hfi1_schedule_send(qp);
 499
 500        clear_mr_refs(qp, 0);
 501
 502        memset(&wc, 0, sizeof(wc));
 503        wc.qp = &qp->ibqp;
 504        wc.opcode = IB_WC_RECV;
 505
 506        if (test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags)) {
 507                wc.wr_id = qp->r_wr_id;
 508                wc.status = err;
 509                hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
 510        }
 511        wc.status = IB_WC_WR_FLUSH_ERR;
 512
 513        if (qp->r_rq.wq) {
 514                struct hfi1_rwq *wq;
 515                u32 head;
 516                u32 tail;
 517
 518                spin_lock(&qp->r_rq.lock);
 519
 520                /* sanity check pointers before trusting them */
 521                wq = qp->r_rq.wq;
 522                head = wq->head;
 523                if (head >= qp->r_rq.size)
 524                        head = 0;
 525                tail = wq->tail;
 526                if (tail >= qp->r_rq.size)
 527                        tail = 0;
 528                while (tail != head) {
 529                        wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
 530                        if (++tail >= qp->r_rq.size)
 531                                tail = 0;
 532                        hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
 533                }
 534                wq->tail = tail;
 535
 536                spin_unlock(&qp->r_rq.lock);
 537        } else if (qp->ibqp.event_handler)
 538                ret = 1;
 539
 540bail:
 541        return ret;
 542}
 543
 544static void flush_tx_list(struct hfi1_qp *qp)
 545{
 546        while (!list_empty(&qp->s_iowait.tx_head)) {
 547                struct sdma_txreq *tx;
 548
 549                tx = list_first_entry(
 550                        &qp->s_iowait.tx_head,
 551                        struct sdma_txreq,
 552                        list);
 553                list_del_init(&tx->list);
 554                hfi1_put_txreq(
 555                        container_of(tx, struct verbs_txreq, txreq));
 556        }
 557}
 558
 559static void flush_iowait(struct hfi1_qp *qp)
 560{
 561        struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 562        unsigned long flags;
 563
 564        write_seqlock_irqsave(&dev->iowait_lock, flags);
 565        if (!list_empty(&qp->s_iowait.list)) {
 566                list_del_init(&qp->s_iowait.list);
 567                if (atomic_dec_and_test(&qp->refcount))
 568                        wake_up(&qp->wait);
 569        }
 570        write_sequnlock_irqrestore(&dev->iowait_lock, flags);
 571}
 572
 573static inline int opa_mtu_enum_to_int(int mtu)
 574{
 575        switch (mtu) {
 576        case OPA_MTU_8192:  return 8192;
 577        case OPA_MTU_10240: return 10240;
 578        default:            return -1;
 579        }
 580}
 581
 582/**
 583 * This function is what we would push to the core layer if we wanted to be a
 584 * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
 585 * to blindly pass the MTU enum value from the PathRecord to us.
 586 *
 587 * The actual flag used to determine "8k MTU" will change and is currently
 588 * unknown.
 589 */
 590static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
 591{
 592        int val = opa_mtu_enum_to_int((int)mtu);
 593
 594        if (val > 0)
 595                return val;
 596        return ib_mtu_enum_to_int(mtu);
 597}
 598
 599
 600/**
 601 * hfi1_modify_qp - modify the attributes of a queue pair
 602 * @ibqp: the queue pair who's attributes we're modifying
 603 * @attr: the new attributes
 604 * @attr_mask: the mask of attributes to modify
 605 * @udata: user data for libibverbs.so
 606 *
 607 * Returns 0 on success, otherwise returns an errno.
 608 */
 609int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 610                   int attr_mask, struct ib_udata *udata)
 611{
 612        struct hfi1_ibdev *dev = to_idev(ibqp->device);
 613        struct hfi1_qp *qp = to_iqp(ibqp);
 614        enum ib_qp_state cur_state, new_state;
 615        struct ib_event ev;
 616        int lastwqe = 0;
 617        int mig = 0;
 618        int ret;
 619        u32 pmtu = 0; /* for gcc warning only */
 620        struct hfi1_devdata *dd;
 621
 622        spin_lock_irq(&qp->r_lock);
 623        spin_lock(&qp->s_lock);
 624
 625        cur_state = attr_mask & IB_QP_CUR_STATE ?
 626                attr->cur_qp_state : qp->state;
 627        new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 628
 629        if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
 630                                attr_mask, IB_LINK_LAYER_UNSPECIFIED))
 631                goto inval;
 632
 633        if (attr_mask & IB_QP_AV) {
 634                if (attr->ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
 635                        goto inval;
 636                if (hfi1_check_ah(qp->ibqp.device, &attr->ah_attr))
 637                        goto inval;
 638        }
 639
 640        if (attr_mask & IB_QP_ALT_PATH) {
 641                if (attr->alt_ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
 642                        goto inval;
 643                if (hfi1_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
 644                        goto inval;
 645                if (attr->alt_pkey_index >= hfi1_get_npkeys(dd_from_dev(dev)))
 646                        goto inval;
 647        }
 648
 649        if (attr_mask & IB_QP_PKEY_INDEX)
 650                if (attr->pkey_index >= hfi1_get_npkeys(dd_from_dev(dev)))
 651                        goto inval;
 652
 653        if (attr_mask & IB_QP_MIN_RNR_TIMER)
 654                if (attr->min_rnr_timer > 31)
 655                        goto inval;
 656
 657        if (attr_mask & IB_QP_PORT)
 658                if (qp->ibqp.qp_type == IB_QPT_SMI ||
 659                    qp->ibqp.qp_type == IB_QPT_GSI ||
 660                    attr->port_num == 0 ||
 661                    attr->port_num > ibqp->device->phys_port_cnt)
 662                        goto inval;
 663
 664        if (attr_mask & IB_QP_DEST_QPN)
 665                if (attr->dest_qp_num > HFI1_QPN_MASK)
 666                        goto inval;
 667
 668        if (attr_mask & IB_QP_RETRY_CNT)
 669                if (attr->retry_cnt > 7)
 670                        goto inval;
 671
 672        if (attr_mask & IB_QP_RNR_RETRY)
 673                if (attr->rnr_retry > 7)
 674                        goto inval;
 675
 676        /*
 677         * Don't allow invalid path_mtu values.  OK to set greater
 678         * than the active mtu (or even the max_cap, if we have tuned
 679         * that to a small mtu.  We'll set qp->path_mtu
 680         * to the lesser of requested attribute mtu and active,
 681         * for packetizing messages.
 682         * Note that the QP port has to be set in INIT and MTU in RTR.
 683         */
 684        if (attr_mask & IB_QP_PATH_MTU) {
 685                int mtu, pidx = qp->port_num - 1;
 686
 687                dd = dd_from_dev(dev);
 688                mtu = verbs_mtu_enum_to_int(ibqp->device, attr->path_mtu);
 689                if (mtu == -1)
 690                        goto inval;
 691
 692                if (mtu > dd->pport[pidx].ibmtu)
 693                        pmtu = mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
 694                else
 695                        pmtu = attr->path_mtu;
 696        }
 697
 698        if (attr_mask & IB_QP_PATH_MIG_STATE) {
 699                if (attr->path_mig_state == IB_MIG_REARM) {
 700                        if (qp->s_mig_state == IB_MIG_ARMED)
 701                                goto inval;
 702                        if (new_state != IB_QPS_RTS)
 703                                goto inval;
 704                } else if (attr->path_mig_state == IB_MIG_MIGRATED) {
 705                        if (qp->s_mig_state == IB_MIG_REARM)
 706                                goto inval;
 707                        if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
 708                                goto inval;
 709                        if (qp->s_mig_state == IB_MIG_ARMED)
 710                                mig = 1;
 711                } else
 712                        goto inval;
 713        }
 714
 715        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
 716                if (attr->max_dest_rd_atomic > HFI1_MAX_RDMA_ATOMIC)
 717                        goto inval;
 718
 719        switch (new_state) {
 720        case IB_QPS_RESET:
 721                if (qp->state != IB_QPS_RESET) {
 722                        qp->state = IB_QPS_RESET;
 723                        flush_iowait(qp);
 724                        qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
 725                        spin_unlock(&qp->s_lock);
 726                        spin_unlock_irq(&qp->r_lock);
 727                        /* Stop the sending work queue and retry timer */
 728                        cancel_work_sync(&qp->s_iowait.iowork);
 729                        del_timer_sync(&qp->s_timer);
 730                        iowait_sdma_drain(&qp->s_iowait);
 731                        flush_tx_list(qp);
 732                        remove_qp(dev, qp);
 733                        wait_event(qp->wait, !atomic_read(&qp->refcount));
 734                        spin_lock_irq(&qp->r_lock);
 735                        spin_lock(&qp->s_lock);
 736                        clear_mr_refs(qp, 1);
 737                        clear_ahg(qp);
 738                        reset_qp(qp, ibqp->qp_type);
 739                }
 740                break;
 741
 742        case IB_QPS_RTR:
 743                /* Allow event to re-trigger if QP set to RTR more than once */
 744                qp->r_flags &= ~HFI1_R_COMM_EST;
 745                qp->state = new_state;
 746                break;
 747
 748        case IB_QPS_SQD:
 749                qp->s_draining = qp->s_last != qp->s_cur;
 750                qp->state = new_state;
 751                break;
 752
 753        case IB_QPS_SQE:
 754                if (qp->ibqp.qp_type == IB_QPT_RC)
 755                        goto inval;
 756                qp->state = new_state;
 757                break;
 758
 759        case IB_QPS_ERR:
 760                lastwqe = hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 761                break;
 762
 763        default:
 764                qp->state = new_state;
 765                break;
 766        }
 767
 768        if (attr_mask & IB_QP_PKEY_INDEX)
 769                qp->s_pkey_index = attr->pkey_index;
 770
 771        if (attr_mask & IB_QP_PORT)
 772                qp->port_num = attr->port_num;
 773
 774        if (attr_mask & IB_QP_DEST_QPN)
 775                qp->remote_qpn = attr->dest_qp_num;
 776
 777        if (attr_mask & IB_QP_SQ_PSN) {
 778                qp->s_next_psn = attr->sq_psn & PSN_MODIFY_MASK;
 779                qp->s_psn = qp->s_next_psn;
 780                qp->s_sending_psn = qp->s_next_psn;
 781                qp->s_last_psn = qp->s_next_psn - 1;
 782                qp->s_sending_hpsn = qp->s_last_psn;
 783        }
 784
 785        if (attr_mask & IB_QP_RQ_PSN)
 786                qp->r_psn = attr->rq_psn & PSN_MODIFY_MASK;
 787
 788        if (attr_mask & IB_QP_ACCESS_FLAGS)
 789                qp->qp_access_flags = attr->qp_access_flags;
 790
 791        if (attr_mask & IB_QP_AV) {
 792                qp->remote_ah_attr = attr->ah_attr;
 793                qp->s_srate = attr->ah_attr.static_rate;
 794                qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
 795        }
 796
 797        if (attr_mask & IB_QP_ALT_PATH) {
 798                qp->alt_ah_attr = attr->alt_ah_attr;
 799                qp->s_alt_pkey_index = attr->alt_pkey_index;
 800        }
 801
 802        if (attr_mask & IB_QP_PATH_MIG_STATE) {
 803                qp->s_mig_state = attr->path_mig_state;
 804                if (mig) {
 805                        qp->remote_ah_attr = qp->alt_ah_attr;
 806                        qp->port_num = qp->alt_ah_attr.port_num;
 807                        qp->s_pkey_index = qp->s_alt_pkey_index;
 808                        qp->s_flags |= HFI1_S_AHG_CLEAR;
 809                }
 810        }
 811
 812        if (attr_mask & IB_QP_PATH_MTU) {
 813                struct hfi1_ibport *ibp;
 814                u8 sc, vl;
 815                u32 mtu;
 816
 817                dd = dd_from_dev(dev);
 818                ibp = &dd->pport[qp->port_num - 1].ibport_data;
 819
 820                sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
 821                vl = sc_to_vlt(dd, sc);
 822
 823                mtu = verbs_mtu_enum_to_int(ibqp->device, pmtu);
 824                if (vl < PER_VL_SEND_CONTEXTS)
 825                        mtu = min_t(u32, mtu, dd->vld[vl].mtu);
 826                pmtu = mtu_to_enum(mtu, OPA_MTU_8192);
 827
 828                qp->path_mtu = pmtu;
 829                qp->pmtu = mtu;
 830        }
 831
 832        if (attr_mask & IB_QP_RETRY_CNT) {
 833                qp->s_retry_cnt = attr->retry_cnt;
 834                qp->s_retry = attr->retry_cnt;
 835        }
 836
 837        if (attr_mask & IB_QP_RNR_RETRY) {
 838                qp->s_rnr_retry_cnt = attr->rnr_retry;
 839                qp->s_rnr_retry = attr->rnr_retry;
 840        }
 841
 842        if (attr_mask & IB_QP_MIN_RNR_TIMER)
 843                qp->r_min_rnr_timer = attr->min_rnr_timer;
 844
 845        if (attr_mask & IB_QP_TIMEOUT) {
 846                qp->timeout = attr->timeout;
 847                qp->timeout_jiffies =
 848                        usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
 849                                1000UL);
 850        }
 851
 852        if (attr_mask & IB_QP_QKEY)
 853                qp->qkey = attr->qkey;
 854
 855        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
 856                qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
 857
 858        if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
 859                qp->s_max_rd_atomic = attr->max_rd_atomic;
 860
 861        spin_unlock(&qp->s_lock);
 862        spin_unlock_irq(&qp->r_lock);
 863
 864        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
 865                insert_qp(dev, qp);
 866
 867        if (lastwqe) {
 868                ev.device = qp->ibqp.device;
 869                ev.element.qp = &qp->ibqp;
 870                ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
 871                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 872        }
 873        if (mig) {
 874                ev.device = qp->ibqp.device;
 875                ev.element.qp = &qp->ibqp;
 876                ev.event = IB_EVENT_PATH_MIG;
 877                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 878        }
 879        ret = 0;
 880        goto bail;
 881
 882inval:
 883        spin_unlock(&qp->s_lock);
 884        spin_unlock_irq(&qp->r_lock);
 885        ret = -EINVAL;
 886
 887bail:
 888        return ret;
 889}
 890
 891int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 892                  int attr_mask, struct ib_qp_init_attr *init_attr)
 893{
 894        struct hfi1_qp *qp = to_iqp(ibqp);
 895
 896        attr->qp_state = qp->state;
 897        attr->cur_qp_state = attr->qp_state;
 898        attr->path_mtu = qp->path_mtu;
 899        attr->path_mig_state = qp->s_mig_state;
 900        attr->qkey = qp->qkey;
 901        attr->rq_psn = mask_psn(qp->r_psn);
 902        attr->sq_psn = mask_psn(qp->s_next_psn);
 903        attr->dest_qp_num = qp->remote_qpn;
 904        attr->qp_access_flags = qp->qp_access_flags;
 905        attr->cap.max_send_wr = qp->s_size - 1;
 906        attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
 907        attr->cap.max_send_sge = qp->s_max_sge;
 908        attr->cap.max_recv_sge = qp->r_rq.max_sge;
 909        attr->cap.max_inline_data = 0;
 910        attr->ah_attr = qp->remote_ah_attr;
 911        attr->alt_ah_attr = qp->alt_ah_attr;
 912        attr->pkey_index = qp->s_pkey_index;
 913        attr->alt_pkey_index = qp->s_alt_pkey_index;
 914        attr->en_sqd_async_notify = 0;
 915        attr->sq_draining = qp->s_draining;
 916        attr->max_rd_atomic = qp->s_max_rd_atomic;
 917        attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
 918        attr->min_rnr_timer = qp->r_min_rnr_timer;
 919        attr->port_num = qp->port_num;
 920        attr->timeout = qp->timeout;
 921        attr->retry_cnt = qp->s_retry_cnt;
 922        attr->rnr_retry = qp->s_rnr_retry_cnt;
 923        attr->alt_port_num = qp->alt_ah_attr.port_num;
 924        attr->alt_timeout = qp->alt_timeout;
 925
 926        init_attr->event_handler = qp->ibqp.event_handler;
 927        init_attr->qp_context = qp->ibqp.qp_context;
 928        init_attr->send_cq = qp->ibqp.send_cq;
 929        init_attr->recv_cq = qp->ibqp.recv_cq;
 930        init_attr->srq = qp->ibqp.srq;
 931        init_attr->cap = attr->cap;
 932        if (qp->s_flags & HFI1_S_SIGNAL_REQ_WR)
 933                init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
 934        else
 935                init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
 936        init_attr->qp_type = qp->ibqp.qp_type;
 937        init_attr->port_num = qp->port_num;
 938        return 0;
 939}
 940
 941/**
 942 * hfi1_compute_aeth - compute the AETH (syndrome + MSN)
 943 * @qp: the queue pair to compute the AETH for
 944 *
 945 * Returns the AETH.
 946 */
 947__be32 hfi1_compute_aeth(struct hfi1_qp *qp)
 948{
 949        u32 aeth = qp->r_msn & HFI1_MSN_MASK;
 950
 951        if (qp->ibqp.srq) {
 952                /*
 953                 * Shared receive queues don't generate credits.
 954                 * Set the credit field to the invalid value.
 955                 */
 956                aeth |= HFI1_AETH_CREDIT_INVAL << HFI1_AETH_CREDIT_SHIFT;
 957        } else {
 958                u32 min, max, x;
 959                u32 credits;
 960                struct hfi1_rwq *wq = qp->r_rq.wq;
 961                u32 head;
 962                u32 tail;
 963
 964                /* sanity check pointers before trusting them */
 965                head = wq->head;
 966                if (head >= qp->r_rq.size)
 967                        head = 0;
 968                tail = wq->tail;
 969                if (tail >= qp->r_rq.size)
 970                        tail = 0;
 971                /*
 972                 * Compute the number of credits available (RWQEs).
 973                 * There is a small chance that the pair of reads are
 974                 * not atomic, which is OK, since the fuzziness is
 975                 * resolved as further ACKs go out.
 976                 */
 977                credits = head - tail;
 978                if ((int)credits < 0)
 979                        credits += qp->r_rq.size;
 980                /*
 981                 * Binary search the credit table to find the code to
 982                 * use.
 983                 */
 984                min = 0;
 985                max = 31;
 986                for (;;) {
 987                        x = (min + max) / 2;
 988                        if (credit_table[x] == credits)
 989                                break;
 990                        if (credit_table[x] > credits)
 991                                max = x;
 992                        else if (min == x)
 993                                break;
 994                        else
 995                                min = x;
 996                }
 997                aeth |= x << HFI1_AETH_CREDIT_SHIFT;
 998        }
 999        return cpu_to_be32(aeth);
1000}
1001
1002/**
1003 * hfi1_create_qp - create a queue pair for a device
1004 * @ibpd: the protection domain who's device we create the queue pair for
1005 * @init_attr: the attributes of the queue pair
1006 * @udata: user data for libibverbs.so
1007 *
1008 * Returns the queue pair on success, otherwise returns an errno.
1009 *
1010 * Called by the ib_create_qp() core verbs function.
1011 */
1012struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
1013                             struct ib_qp_init_attr *init_attr,
1014                             struct ib_udata *udata)
1015{
1016        struct hfi1_qp *qp;
1017        int err;
1018        struct hfi1_swqe *swq = NULL;
1019        struct hfi1_ibdev *dev;
1020        struct hfi1_devdata *dd;
1021        size_t sz;
1022        size_t sg_list_sz;
1023        struct ib_qp *ret;
1024
1025        if (init_attr->cap.max_send_sge > hfi1_max_sges ||
1026            init_attr->cap.max_send_wr > hfi1_max_qp_wrs ||
1027            init_attr->create_flags) {
1028                ret = ERR_PTR(-EINVAL);
1029                goto bail;
1030        }
1031
1032        /* Check receive queue parameters if no SRQ is specified. */
1033        if (!init_attr->srq) {
1034                if (init_attr->cap.max_recv_sge > hfi1_max_sges ||
1035                    init_attr->cap.max_recv_wr > hfi1_max_qp_wrs) {
1036                        ret = ERR_PTR(-EINVAL);
1037                        goto bail;
1038                }
1039                if (init_attr->cap.max_send_sge +
1040                    init_attr->cap.max_send_wr +
1041                    init_attr->cap.max_recv_sge +
1042                    init_attr->cap.max_recv_wr == 0) {
1043                        ret = ERR_PTR(-EINVAL);
1044                        goto bail;
1045                }
1046        }
1047
1048        switch (init_attr->qp_type) {
1049        case IB_QPT_SMI:
1050        case IB_QPT_GSI:
1051                if (init_attr->port_num == 0 ||
1052                    init_attr->port_num > ibpd->device->phys_port_cnt) {
1053                        ret = ERR_PTR(-EINVAL);
1054                        goto bail;
1055                }
1056        case IB_QPT_UC:
1057        case IB_QPT_RC:
1058        case IB_QPT_UD:
1059                sz = sizeof(struct hfi1_sge) *
1060                        init_attr->cap.max_send_sge +
1061                        sizeof(struct hfi1_swqe);
1062                swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
1063                if (swq == NULL) {
1064                        ret = ERR_PTR(-ENOMEM);
1065                        goto bail;
1066                }
1067                sz = sizeof(*qp);
1068                sg_list_sz = 0;
1069                if (init_attr->srq) {
1070                        struct hfi1_srq *srq = to_isrq(init_attr->srq);
1071
1072                        if (srq->rq.max_sge > 1)
1073                                sg_list_sz = sizeof(*qp->r_sg_list) *
1074                                        (srq->rq.max_sge - 1);
1075                } else if (init_attr->cap.max_recv_sge > 1)
1076                        sg_list_sz = sizeof(*qp->r_sg_list) *
1077                                (init_attr->cap.max_recv_sge - 1);
1078                qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
1079                if (!qp) {
1080                        ret = ERR_PTR(-ENOMEM);
1081                        goto bail_swq;
1082                }
1083                RCU_INIT_POINTER(qp->next, NULL);
1084                qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
1085                if (!qp->s_hdr) {
1086                        ret = ERR_PTR(-ENOMEM);
1087                        goto bail_qp;
1088                }
1089                qp->timeout_jiffies =
1090                        usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
1091                                1000UL);
1092                if (init_attr->srq)
1093                        sz = 0;
1094                else {
1095                        qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
1096                        qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
1097                        sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
1098                                sizeof(struct hfi1_rwqe);
1099                        qp->r_rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) +
1100                                                   qp->r_rq.size * sz);
1101                        if (!qp->r_rq.wq) {
1102                                ret = ERR_PTR(-ENOMEM);
1103                                goto bail_qp;
1104                        }
1105                }
1106
1107                /*
1108                 * ib_create_qp() will initialize qp->ibqp
1109                 * except for qp->ibqp.qp_num.
1110                 */
1111                spin_lock_init(&qp->r_lock);
1112                spin_lock_init(&qp->s_lock);
1113                spin_lock_init(&qp->r_rq.lock);
1114                atomic_set(&qp->refcount, 0);
1115                init_waitqueue_head(&qp->wait);
1116                init_timer(&qp->s_timer);
1117                qp->s_timer.data = (unsigned long)qp;
1118                INIT_LIST_HEAD(&qp->rspwait);
1119                qp->state = IB_QPS_RESET;
1120                qp->s_wq = swq;
1121                qp->s_size = init_attr->cap.max_send_wr + 1;
1122                qp->s_max_sge = init_attr->cap.max_send_sge;
1123                if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
1124                        qp->s_flags = HFI1_S_SIGNAL_REQ_WR;
1125                dev = to_idev(ibpd->device);
1126                dd = dd_from_dev(dev);
1127                err = alloc_qpn(dd, &dev->qp_dev->qpn_table, init_attr->qp_type,
1128                                init_attr->port_num);
1129                if (err < 0) {
1130                        ret = ERR_PTR(err);
1131                        vfree(qp->r_rq.wq);
1132                        goto bail_qp;
1133                }
1134                qp->ibqp.qp_num = err;
1135                qp->port_num = init_attr->port_num;
1136                reset_qp(qp, init_attr->qp_type);
1137
1138                break;
1139
1140        default:
1141                /* Don't support raw QPs */
1142                ret = ERR_PTR(-ENOSYS);
1143                goto bail;
1144        }
1145
1146        init_attr->cap.max_inline_data = 0;
1147
1148        /*
1149         * Return the address of the RWQ as the offset to mmap.
1150         * See hfi1_mmap() for details.
1151         */
1152        if (udata && udata->outlen >= sizeof(__u64)) {
1153                if (!qp->r_rq.wq) {
1154                        __u64 offset = 0;
1155
1156                        err = ib_copy_to_udata(udata, &offset,
1157                                               sizeof(offset));
1158                        if (err) {
1159                                ret = ERR_PTR(err);
1160                                goto bail_ip;
1161                        }
1162                } else {
1163                        u32 s = sizeof(struct hfi1_rwq) + qp->r_rq.size * sz;
1164
1165                        qp->ip = hfi1_create_mmap_info(dev, s,
1166                                                      ibpd->uobject->context,
1167                                                      qp->r_rq.wq);
1168                        if (!qp->ip) {
1169                                ret = ERR_PTR(-ENOMEM);
1170                                goto bail_ip;
1171                        }
1172
1173                        err = ib_copy_to_udata(udata, &(qp->ip->offset),
1174                                               sizeof(qp->ip->offset));
1175                        if (err) {
1176                                ret = ERR_PTR(err);
1177                                goto bail_ip;
1178                        }
1179                }
1180        }
1181
1182        spin_lock(&dev->n_qps_lock);
1183        if (dev->n_qps_allocated == hfi1_max_qps) {
1184                spin_unlock(&dev->n_qps_lock);
1185                ret = ERR_PTR(-ENOMEM);
1186                goto bail_ip;
1187        }
1188
1189        dev->n_qps_allocated++;
1190        spin_unlock(&dev->n_qps_lock);
1191
1192        if (qp->ip) {
1193                spin_lock_irq(&dev->pending_lock);
1194                list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
1195                spin_unlock_irq(&dev->pending_lock);
1196        }
1197
1198        ret = &qp->ibqp;
1199
1200        /*
1201         * We have our QP and its good, now keep track of what types of opcodes
1202         * can be processed on this QP. We do this by keeping track of what the
1203         * 3 high order bits of the opcode are.
1204         */
1205        switch (init_attr->qp_type) {
1206        case IB_QPT_SMI:
1207        case IB_QPT_GSI:
1208        case IB_QPT_UD:
1209                qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & OPCODE_QP_MASK;
1210                break;
1211        case IB_QPT_RC:
1212                qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & OPCODE_QP_MASK;
1213                break;
1214        case IB_QPT_UC:
1215                qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & OPCODE_QP_MASK;
1216                break;
1217        default:
1218                ret = ERR_PTR(-EINVAL);
1219                goto bail_ip;
1220        }
1221
1222        goto bail;
1223
1224bail_ip:
1225        if (qp->ip)
1226                kref_put(&qp->ip->ref, hfi1_release_mmap_info);
1227        else
1228                vfree(qp->r_rq.wq);
1229        free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num);
1230bail_qp:
1231        kfree(qp->s_hdr);
1232        kfree(qp);
1233bail_swq:
1234        vfree(swq);
1235bail:
1236        return ret;
1237}
1238
1239/**
1240 * hfi1_destroy_qp - destroy a queue pair
1241 * @ibqp: the queue pair to destroy
1242 *
1243 * Returns 0 on success.
1244 *
1245 * Note that this can be called while the QP is actively sending or
1246 * receiving!
1247 */
1248int hfi1_destroy_qp(struct ib_qp *ibqp)
1249{
1250        struct hfi1_qp *qp = to_iqp(ibqp);
1251        struct hfi1_ibdev *dev = to_idev(ibqp->device);
1252
1253        /* Make sure HW and driver activity is stopped. */
1254        spin_lock_irq(&qp->r_lock);
1255        spin_lock(&qp->s_lock);
1256        if (qp->state != IB_QPS_RESET) {
1257                qp->state = IB_QPS_RESET;
1258                flush_iowait(qp);
1259                qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
1260                spin_unlock(&qp->s_lock);
1261                spin_unlock_irq(&qp->r_lock);
1262                cancel_work_sync(&qp->s_iowait.iowork);
1263                del_timer_sync(&qp->s_timer);
1264                iowait_sdma_drain(&qp->s_iowait);
1265                flush_tx_list(qp);
1266                remove_qp(dev, qp);
1267                wait_event(qp->wait, !atomic_read(&qp->refcount));
1268                spin_lock_irq(&qp->r_lock);
1269                spin_lock(&qp->s_lock);
1270                clear_mr_refs(qp, 1);
1271                clear_ahg(qp);
1272        }
1273        spin_unlock(&qp->s_lock);
1274        spin_unlock_irq(&qp->r_lock);
1275
1276        /* all user's cleaned up, mark it available */
1277        free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num);
1278        spin_lock(&dev->n_qps_lock);
1279        dev->n_qps_allocated--;
1280        spin_unlock(&dev->n_qps_lock);
1281
1282        if (qp->ip)
1283                kref_put(&qp->ip->ref, hfi1_release_mmap_info);
1284        else
1285                vfree(qp->r_rq.wq);
1286        vfree(qp->s_wq);
1287        kfree(qp->s_hdr);
1288        kfree(qp);
1289        return 0;
1290}
1291
1292/**
1293 * init_qpn_table - initialize the QP number table for a device
1294 * @qpt: the QPN table
1295 */
1296static int init_qpn_table(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt)
1297{
1298        u32 offset, qpn, i;
1299        struct qpn_map *map;
1300        int ret = 0;
1301
1302        spin_lock_init(&qpt->lock);
1303
1304        qpt->last = 0;
1305        qpt->incr = 1 << dd->qos_shift;
1306
1307        /* insure we don't assign QPs from KDETH 64K window */
1308        qpn = kdeth_qp << 16;
1309        qpt->nmaps = qpn / BITS_PER_PAGE;
1310        /* This should always be zero */
1311        offset = qpn & BITS_PER_PAGE_MASK;
1312        map = &qpt->map[qpt->nmaps];
1313        dd_dev_info(dd, "Reserving QPNs for KDETH window from 0x%x to 0x%x\n",
1314                qpn, qpn + 65535);
1315        for (i = 0; i < 65536; i++) {
1316                if (!map->page) {
1317                        get_map_page(qpt, map);
1318                        if (!map->page) {
1319                                ret = -ENOMEM;
1320                                break;
1321                        }
1322                }
1323                set_bit(offset, map->page);
1324                offset++;
1325                if (offset == BITS_PER_PAGE) {
1326                        /* next page */
1327                        qpt->nmaps++;
1328                        map++;
1329                        offset = 0;
1330                }
1331        }
1332        return ret;
1333}
1334
1335/**
1336 * free_qpn_table - free the QP number table for a device
1337 * @qpt: the QPN table
1338 */
1339static void free_qpn_table(struct hfi1_qpn_table *qpt)
1340{
1341        int i;
1342
1343        for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
1344                free_page((unsigned long) qpt->map[i].page);
1345}
1346
1347/**
1348 * hfi1_get_credit - flush the send work queue of a QP
1349 * @qp: the qp who's send work queue to flush
1350 * @aeth: the Acknowledge Extended Transport Header
1351 *
1352 * The QP s_lock should be held.
1353 */
1354void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth)
1355{
1356        u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
1357
1358        /*
1359         * If the credit is invalid, we can send
1360         * as many packets as we like.  Otherwise, we have to
1361         * honor the credit field.
1362         */
1363        if (credit == HFI1_AETH_CREDIT_INVAL) {
1364                if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) {
1365                        qp->s_flags |= HFI1_S_UNLIMITED_CREDIT;
1366                        if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) {
1367                                qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT;
1368                                hfi1_schedule_send(qp);
1369                        }
1370                }
1371        } else if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) {
1372                /* Compute new LSN (i.e., MSN + credit) */
1373                credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK;
1374                if (cmp_msn(credit, qp->s_lsn) > 0) {
1375                        qp->s_lsn = credit;
1376                        if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) {
1377                                qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT;
1378                                hfi1_schedule_send(qp);
1379                        }
1380                }
1381        }
1382}
1383
1384void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag)
1385{
1386        unsigned long flags;
1387
1388        spin_lock_irqsave(&qp->s_lock, flags);
1389        if (qp->s_flags & flag) {
1390                qp->s_flags &= ~flag;
1391                trace_hfi1_qpwakeup(qp, flag);
1392                hfi1_schedule_send(qp);
1393        }
1394        spin_unlock_irqrestore(&qp->s_lock, flags);
1395        /* Notify hfi1_destroy_qp() if it is waiting. */
1396        if (atomic_dec_and_test(&qp->refcount))
1397                wake_up(&qp->wait);
1398}
1399
1400static int iowait_sleep(
1401        struct sdma_engine *sde,
1402        struct iowait *wait,
1403        struct sdma_txreq *stx,
1404        unsigned seq)
1405{
1406        struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
1407        struct hfi1_qp *qp;
1408        unsigned long flags;
1409        int ret = 0;
1410        struct hfi1_ibdev *dev;
1411
1412        qp = tx->qp;
1413
1414        spin_lock_irqsave(&qp->s_lock, flags);
1415        if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
1416
1417                /*
1418                 * If we couldn't queue the DMA request, save the info
1419                 * and try again later rather than destroying the
1420                 * buffer and undoing the side effects of the copy.
1421                 */
1422                /* Make a common routine? */
1423                dev = &sde->dd->verbs_dev;
1424                list_add_tail(&stx->list, &wait->tx_head);
1425                write_seqlock(&dev->iowait_lock);
1426                if (sdma_progress(sde, seq, stx))
1427                        goto eagain;
1428                if (list_empty(&qp->s_iowait.list)) {
1429                        struct hfi1_ibport *ibp =
1430                                to_iport(qp->ibqp.device, qp->port_num);
1431
1432                        ibp->n_dmawait++;
1433                        qp->s_flags |= HFI1_S_WAIT_DMA_DESC;
1434                        list_add_tail(&qp->s_iowait.list, &sde->dmawait);
1435                        trace_hfi1_qpsleep(qp, HFI1_S_WAIT_DMA_DESC);
1436                        atomic_inc(&qp->refcount);
1437                }
1438                write_sequnlock(&dev->iowait_lock);
1439                qp->s_flags &= ~HFI1_S_BUSY;
1440                spin_unlock_irqrestore(&qp->s_lock, flags);
1441                ret = -EBUSY;
1442        } else {
1443                spin_unlock_irqrestore(&qp->s_lock, flags);
1444                hfi1_put_txreq(tx);
1445        }
1446        return ret;
1447eagain:
1448        write_sequnlock(&dev->iowait_lock);
1449        spin_unlock_irqrestore(&qp->s_lock, flags);
1450        list_del_init(&stx->list);
1451        return -EAGAIN;
1452}
1453
1454static void iowait_wakeup(struct iowait *wait, int reason)
1455{
1456        struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait);
1457
1458        WARN_ON(reason != SDMA_AVAIL_REASON);
1459        hfi1_qp_wakeup(qp, HFI1_S_WAIT_DMA_DESC);
1460}
1461
1462int hfi1_qp_init(struct hfi1_ibdev *dev)
1463{
1464        struct hfi1_devdata *dd = dd_from_dev(dev);
1465        int i;
1466        int ret = -ENOMEM;
1467
1468        /* allocate parent object */
1469        dev->qp_dev = kzalloc(sizeof(*dev->qp_dev), GFP_KERNEL);
1470        if (!dev->qp_dev)
1471                goto nomem;
1472        /* allocate hash table */
1473        dev->qp_dev->qp_table_size = hfi1_qp_table_size;
1474        dev->qp_dev->qp_table_bits = ilog2(hfi1_qp_table_size);
1475        dev->qp_dev->qp_table =
1476                kmalloc(dev->qp_dev->qp_table_size *
1477                                sizeof(*dev->qp_dev->qp_table),
1478                        GFP_KERNEL);
1479        if (!dev->qp_dev->qp_table)
1480                goto nomem;
1481        for (i = 0; i < dev->qp_dev->qp_table_size; i++)
1482                RCU_INIT_POINTER(dev->qp_dev->qp_table[i], NULL);
1483        spin_lock_init(&dev->qp_dev->qpt_lock);
1484        /* initialize qpn map */
1485        ret = init_qpn_table(dd, &dev->qp_dev->qpn_table);
1486        if (ret)
1487                goto nomem;
1488        return ret;
1489nomem:
1490        if (dev->qp_dev) {
1491                kfree(dev->qp_dev->qp_table);
1492                free_qpn_table(&dev->qp_dev->qpn_table);
1493                kfree(dev->qp_dev);
1494        }
1495        return ret;
1496}
1497
1498void hfi1_qp_exit(struct hfi1_ibdev *dev)
1499{
1500        struct hfi1_devdata *dd = dd_from_dev(dev);
1501        u32 qps_inuse;
1502
1503        qps_inuse = free_all_qps(dd);
1504        if (qps_inuse)
1505                dd_dev_err(dd, "QP memory leak! %u still in use\n",
1506                           qps_inuse);
1507        if (dev->qp_dev) {
1508                kfree(dev->qp_dev->qp_table);
1509                free_qpn_table(&dev->qp_dev->qpn_table);
1510                kfree(dev->qp_dev);
1511        }
1512}
1513
1514/**
1515 *
1516 * qp_to_sdma_engine - map a qp to a send engine
1517 * @qp: the QP
1518 * @sc5: the 5 bit sc
1519 *
1520 * Return:
1521 * A send engine for the qp or NULL for SMI type qp.
1522 */
1523struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5)
1524{
1525        struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1526        struct sdma_engine *sde;
1527
1528        if (!(dd->flags & HFI1_HAS_SEND_DMA))
1529                return NULL;
1530        switch (qp->ibqp.qp_type) {
1531        case IB_QPT_UC:
1532        case IB_QPT_RC:
1533                break;
1534        case IB_QPT_SMI:
1535                return NULL;
1536        default:
1537                break;
1538        }
1539        sde = sdma_select_engine_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, sc5);
1540        return sde;
1541}
1542
1543struct qp_iter {
1544        struct hfi1_ibdev *dev;
1545        struct hfi1_qp *qp;
1546        int specials;
1547        int n;
1548};
1549
1550struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
1551{
1552        struct qp_iter *iter;
1553
1554        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1555        if (!iter)
1556                return NULL;
1557
1558        iter->dev = dev;
1559        iter->specials = dev->ibdev.phys_port_cnt * 2;
1560        if (qp_iter_next(iter)) {
1561                kfree(iter);
1562                return NULL;
1563        }
1564
1565        return iter;
1566}
1567
1568int qp_iter_next(struct qp_iter *iter)
1569{
1570        struct hfi1_ibdev *dev = iter->dev;
1571        int n = iter->n;
1572        int ret = 1;
1573        struct hfi1_qp *pqp = iter->qp;
1574        struct hfi1_qp *qp;
1575
1576        /*
1577         * The approach is to consider the special qps
1578         * as an additional table entries before the
1579         * real hash table.  Since the qp code sets
1580         * the qp->next hash link to NULL, this works just fine.
1581         *
1582         * iter->specials is 2 * # ports
1583         *
1584         * n = 0..iter->specials is the special qp indices
1585         *
1586         * n = iter->specials..dev->qp_dev->qp_table_size+iter->specials are
1587         * the potential hash bucket entries
1588         *
1589         */
1590        for (; n <  dev->qp_dev->qp_table_size + iter->specials; n++) {
1591                if (pqp) {
1592                        qp = rcu_dereference(pqp->next);
1593                } else {
1594                        if (n < iter->specials) {
1595                                struct hfi1_pportdata *ppd;
1596                                struct hfi1_ibport *ibp;
1597                                int pidx;
1598
1599                                pidx = n % dev->ibdev.phys_port_cnt;
1600                                ppd = &dd_from_dev(dev)->pport[pidx];
1601                                ibp = &ppd->ibport_data;
1602
1603                                if (!(n & 1))
1604                                        qp = rcu_dereference(ibp->qp[0]);
1605                                else
1606                                        qp = rcu_dereference(ibp->qp[1]);
1607                        } else {
1608                                qp = rcu_dereference(
1609                                        dev->qp_dev->qp_table[
1610                                                (n - iter->specials)]);
1611                        }
1612                }
1613                pqp = qp;
1614                if (qp) {
1615                        iter->qp = qp;
1616                        iter->n = n;
1617                        return 0;
1618                }
1619        }
1620        return ret;
1621}
1622
1623static const char * const qp_type_str[] = {
1624        "SMI", "GSI", "RC", "UC", "UD",
1625};
1626
1627static int qp_idle(struct hfi1_qp *qp)
1628{
1629        return
1630                qp->s_last == qp->s_acked &&
1631                qp->s_acked == qp->s_cur &&
1632                qp->s_cur == qp->s_tail &&
1633                qp->s_tail == qp->s_head;
1634}
1635
1636void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
1637{
1638        struct hfi1_swqe *wqe;
1639        struct hfi1_qp *qp = iter->qp;
1640        struct sdma_engine *sde;
1641
1642        sde = qp_to_sdma_engine(qp, qp->s_sc);
1643        wqe = get_swqe_ptr(qp, qp->s_last);
1644        seq_printf(s,
1645                   "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %d %u %u %u SDE %p,%u\n",
1646                   iter->n,
1647                   qp_idle(qp) ? "I" : "B",
1648                   qp->ibqp.qp_num,
1649                   atomic_read(&qp->refcount),
1650                   qp_type_str[qp->ibqp.qp_type],
1651                   qp->state,
1652                   wqe ? wqe->wr.opcode : 0,
1653                   qp->s_hdrwords,
1654                   qp->s_flags,
1655                   atomic_read(&qp->s_iowait.sdma_busy),
1656                   !list_empty(&qp->s_iowait.list),
1657                   qp->timeout,
1658                   wqe ? wqe->ssn : 0,
1659                   qp->s_lsn,
1660                   qp->s_last_psn,
1661                   qp->s_psn, qp->s_next_psn,
1662                   qp->s_sending_psn, qp->s_sending_hpsn,
1663                   qp->s_last, qp->s_acked, qp->s_cur,
1664                   qp->s_tail, qp->s_head, qp->s_size,
1665                   qp->remote_qpn,
1666                   qp->remote_ah_attr.dlid,
1667                   qp->remote_ah_attr.sl,
1668                   qp->pmtu,
1669                   qp->s_retry_cnt,
1670                   qp->timeout,
1671                   qp->s_rnr_retry_cnt,
1672                   sde,
1673                   sde ? sde->this_idx : 0);
1674}
1675
1676void qp_comm_est(struct hfi1_qp *qp)
1677{
1678        qp->r_flags |= HFI1_R_COMM_EST;
1679        if (qp->ibqp.event_handler) {
1680                struct ib_event ev;
1681
1682                ev.device = qp->ibqp.device;
1683                ev.element.qp = &qp->ibqp;
1684                ev.event = IB_EVENT_COMM_EST;
1685                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1686        }
1687}
1688