linux/drivers/infiniband/core/iwcm.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
   3 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
   4 * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
   5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
   7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
   8 *
   9 * This software is available to you under a choice of one of two
  10 * licenses.  You may choose to be licensed under the terms of the GNU
  11 * General Public License (GPL) Version 2, available from the file
  12 * COPYING in the main directory of this source tree, or the
  13 * OpenIB.org BSD license below:
  14 *
  15 *     Redistribution and use in source and binary forms, with or
  16 *     without modification, are permitted provided that the following
  17 *     conditions are met:
  18 *
  19 *      - Redistributions of source code must retain the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer.
  22 *
  23 *      - Redistributions in binary form must reproduce the above
  24 *        copyright notice, this list of conditions and the following
  25 *        disclaimer in the documentation and/or other materials
  26 *        provided with the distribution.
  27 *
  28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  35 * SOFTWARE.
  36 *
  37 */
  38#include <linux/dma-mapping.h>
  39#include <linux/err.h>
  40#include <linux/idr.h>
  41#include <linux/interrupt.h>
  42#include <linux/rbtree.h>
  43#include <linux/sched.h>
  44#include <linux/spinlock.h>
  45#include <linux/workqueue.h>
  46#include <linux/completion.h>
  47
  48#include <rdma/iw_cm.h>
  49#include <rdma/ib_addr.h>
  50
  51#include "iwcm.h"
  52
  53MODULE_AUTHOR("Tom Tucker");
  54MODULE_DESCRIPTION("iWARP CM");
  55MODULE_LICENSE("Dual BSD/GPL");
  56
  57static struct workqueue_struct *iwcm_wq;
  58struct iwcm_work {
  59        struct work_struct work;
  60        struct iwcm_id_private *cm_id;
  61        struct list_head list;
  62        struct iw_cm_event event;
  63        struct list_head free_list;
  64};
  65
  66/*
  67 * The following services provide a mechanism for pre-allocating iwcm_work
  68 * elements.  The design pre-allocates them  based on the cm_id type:
  69 *      LISTENING IDS:  Get enough elements preallocated to handle the
  70 *                      listen backlog.
  71 *      ACTIVE IDS:     4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
  72 *      PASSIVE IDS:    3: ESTABLISHED, DISCONNECT, CLOSE
  73 *
  74 * Allocating them in connect and listen avoids having to deal
  75 * with allocation failures on the event upcall from the provider (which
  76 * is called in the interrupt context).
  77 *
  78 * One exception is when creating the cm_id for incoming connection requests.
  79 * There are two cases:
  80 * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
  81 *    the backlog is exceeded, then no more connection request events will
  82 *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
  83 *    to the provider to reject the connection request.
  84 * 2) in the connection request workqueue handler, cm_conn_req_handler().
  85 *    If work elements cannot be allocated for the new connect request cm_id,
  86 *    then IWCM will call the provider reject method.  This is ok since
  87 *    cm_conn_req_handler() runs in the workqueue thread context.
  88 */
  89
  90static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
  91{
  92        struct iwcm_work *work;
  93
  94        if (list_empty(&cm_id_priv->work_free_list))
  95                return NULL;
  96        work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
  97                          free_list);
  98        list_del_init(&work->free_list);
  99        return work;
 100}
 101
 102static void put_work(struct iwcm_work *work)
 103{
 104        list_add(&work->free_list, &work->cm_id->work_free_list);
 105}
 106
 107static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
 108{
 109        struct list_head *e, *tmp;
 110
 111        list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
 112                kfree(list_entry(e, struct iwcm_work, free_list));
 113}
 114
 115static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
 116{
 117        struct iwcm_work *work;
 118
 119        BUG_ON(!list_empty(&cm_id_priv->work_free_list));
 120        while (count--) {
 121                work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
 122                if (!work) {
 123                        dealloc_work_entries(cm_id_priv);
 124                        return -ENOMEM;
 125                }
 126                work->cm_id = cm_id_priv;
 127                INIT_LIST_HEAD(&work->list);
 128                put_work(work);
 129        }
 130        return 0;
 131}
 132
 133/*
 134 * Save private data from incoming connection requests to
 135 * iw_cm_event, so the low level driver doesn't have to. Adjust
 136 * the event ptr to point to the local copy.
 137 */
 138static int copy_private_data(struct iw_cm_event *event)
 139{
 140        void *p;
 141
 142        p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC);
 143        if (!p)
 144                return -ENOMEM;
 145        event->private_data = p;
 146        return 0;
 147}
 148
 149static void free_cm_id(struct iwcm_id_private *cm_id_priv)
 150{
 151        dealloc_work_entries(cm_id_priv);
 152        kfree(cm_id_priv);
 153}
 154
 155/*
 156 * Release a reference on cm_id. If the last reference is being
 157 * released, enable the waiting thread (in iw_destroy_cm_id) to
 158 * get woken up, and return 1 if a thread is already waiting.
 159 */
 160static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
 161{
 162        BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
 163        if (atomic_dec_and_test(&cm_id_priv->refcount)) {
 164                BUG_ON(!list_empty(&cm_id_priv->work_list));
 165                complete(&cm_id_priv->destroy_comp);
 166                return 1;
 167        }
 168
 169        return 0;
 170}
 171
 172static void add_ref(struct iw_cm_id *cm_id)
 173{
 174        struct iwcm_id_private *cm_id_priv;
 175        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 176        atomic_inc(&cm_id_priv->refcount);
 177}
 178
 179static void rem_ref(struct iw_cm_id *cm_id)
 180{
 181        struct iwcm_id_private *cm_id_priv;
 182        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 183        if (iwcm_deref_id(cm_id_priv) &&
 184            test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
 185                BUG_ON(!list_empty(&cm_id_priv->work_list));
 186                free_cm_id(cm_id_priv);
 187        }
 188}
 189
 190static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
 191
 192struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
 193                                 iw_cm_handler cm_handler,
 194                                 void *context)
 195{
 196        struct iwcm_id_private *cm_id_priv;
 197
 198        cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
 199        if (!cm_id_priv)
 200                return ERR_PTR(-ENOMEM);
 201
 202        cm_id_priv->state = IW_CM_STATE_IDLE;
 203        cm_id_priv->id.device = device;
 204        cm_id_priv->id.cm_handler = cm_handler;
 205        cm_id_priv->id.context = context;
 206        cm_id_priv->id.event_handler = cm_event_handler;
 207        cm_id_priv->id.add_ref = add_ref;
 208        cm_id_priv->id.rem_ref = rem_ref;
 209        spin_lock_init(&cm_id_priv->lock);
 210        atomic_set(&cm_id_priv->refcount, 1);
 211        init_waitqueue_head(&cm_id_priv->connect_wait);
 212        init_completion(&cm_id_priv->destroy_comp);
 213        INIT_LIST_HEAD(&cm_id_priv->work_list);
 214        INIT_LIST_HEAD(&cm_id_priv->work_free_list);
 215
 216        return &cm_id_priv->id;
 217}
 218EXPORT_SYMBOL(iw_create_cm_id);
 219
 220
 221static int iwcm_modify_qp_err(struct ib_qp *qp)
 222{
 223        struct ib_qp_attr qp_attr;
 224
 225        if (!qp)
 226                return -EINVAL;
 227
 228        qp_attr.qp_state = IB_QPS_ERR;
 229        return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
 230}
 231
 232/*
 233 * This is really the RDMAC CLOSING state. It is most similar to the
 234 * IB SQD QP state.
 235 */
 236static int iwcm_modify_qp_sqd(struct ib_qp *qp)
 237{
 238        struct ib_qp_attr qp_attr;
 239
 240        BUG_ON(qp == NULL);
 241        qp_attr.qp_state = IB_QPS_SQD;
 242        return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
 243}
 244
 245/*
 246 * CM_ID <-- CLOSING
 247 *
 248 * Block if a passive or active connection is currently being processed. Then
 249 * process the event as follows:
 250 * - If we are ESTABLISHED, move to CLOSING and modify the QP state
 251 *   based on the abrupt flag
 252 * - If the connection is already in the CLOSING or IDLE state, the peer is
 253 *   disconnecting concurrently with us and we've already seen the
 254 *   DISCONNECT event -- ignore the request and return 0
 255 * - Disconnect on a listening endpoint returns -EINVAL
 256 */
 257int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
 258{
 259        struct iwcm_id_private *cm_id_priv;
 260        unsigned long flags;
 261        int ret = 0;
 262        struct ib_qp *qp = NULL;
 263
 264        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 265        /* Wait if we're currently in a connect or accept downcall */
 266        wait_event(cm_id_priv->connect_wait,
 267                   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
 268
 269        spin_lock_irqsave(&cm_id_priv->lock, flags);
 270        switch (cm_id_priv->state) {
 271        case IW_CM_STATE_ESTABLISHED:
 272                cm_id_priv->state = IW_CM_STATE_CLOSING;
 273
 274                /* QP could be <nul> for user-mode client */
 275                if (cm_id_priv->qp)
 276                        qp = cm_id_priv->qp;
 277                else
 278                        ret = -EINVAL;
 279                break;
 280        case IW_CM_STATE_LISTEN:
 281                ret = -EINVAL;
 282                break;
 283        case IW_CM_STATE_CLOSING:
 284                /* remote peer closed first */
 285        case IW_CM_STATE_IDLE:
 286                /* accept or connect returned !0 */
 287                break;
 288        case IW_CM_STATE_CONN_RECV:
 289                /*
 290                 * App called disconnect before/without calling accept after
 291                 * connect_request event delivered.
 292                 */
 293                break;
 294        case IW_CM_STATE_CONN_SENT:
 295                /* Can only get here if wait above fails */
 296        default:
 297                BUG();
 298        }
 299        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 300
 301        if (qp) {
 302                if (abrupt)
 303                        ret = iwcm_modify_qp_err(qp);
 304                else
 305                        ret = iwcm_modify_qp_sqd(qp);
 306
 307                /*
 308                 * If both sides are disconnecting the QP could
 309                 * already be in ERR or SQD states
 310                 */
 311                ret = 0;
 312        }
 313
 314        return ret;
 315}
 316EXPORT_SYMBOL(iw_cm_disconnect);
 317
 318/*
 319 * CM_ID <-- DESTROYING
 320 *
 321 * Clean up all resources associated with the connection and release
 322 * the initial reference taken by iw_create_cm_id.
 323 */
 324static void destroy_cm_id(struct iw_cm_id *cm_id)
 325{
 326        struct iwcm_id_private *cm_id_priv;
 327        unsigned long flags;
 328        int ret;
 329
 330        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 331        /*
 332         * Wait if we're currently in a connect or accept downcall. A
 333         * listening endpoint should never block here.
 334         */
 335        wait_event(cm_id_priv->connect_wait,
 336                   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
 337
 338        spin_lock_irqsave(&cm_id_priv->lock, flags);
 339        switch (cm_id_priv->state) {
 340        case IW_CM_STATE_LISTEN:
 341                cm_id_priv->state = IW_CM_STATE_DESTROYING;
 342                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 343                /* destroy the listening endpoint */
 344                ret = cm_id->device->iwcm->destroy_listen(cm_id);
 345                spin_lock_irqsave(&cm_id_priv->lock, flags);
 346                break;
 347        case IW_CM_STATE_ESTABLISHED:
 348                cm_id_priv->state = IW_CM_STATE_DESTROYING;
 349                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 350                /* Abrupt close of the connection */
 351                (void)iwcm_modify_qp_err(cm_id_priv->qp);
 352                spin_lock_irqsave(&cm_id_priv->lock, flags);
 353                break;
 354        case IW_CM_STATE_IDLE:
 355        case IW_CM_STATE_CLOSING:
 356                cm_id_priv->state = IW_CM_STATE_DESTROYING;
 357                break;
 358        case IW_CM_STATE_CONN_RECV:
 359                /*
 360                 * App called destroy before/without calling accept after
 361                 * receiving connection request event notification or
 362                 * returned non zero from the event callback function.
 363                 * In either case, must tell the provider to reject.
 364                 */
 365                cm_id_priv->state = IW_CM_STATE_DESTROYING;
 366                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 367                cm_id->device->iwcm->reject(cm_id, NULL, 0);
 368                spin_lock_irqsave(&cm_id_priv->lock, flags);
 369                break;
 370        case IW_CM_STATE_CONN_SENT:
 371        case IW_CM_STATE_DESTROYING:
 372        default:
 373                BUG();
 374                break;
 375        }
 376        if (cm_id_priv->qp) {
 377                cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
 378                cm_id_priv->qp = NULL;
 379        }
 380        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 381
 382        (void)iwcm_deref_id(cm_id_priv);
 383}
 384
 385/*
 386 * This function is only called by the application thread and cannot
 387 * be called by the event thread. The function will wait for all
 388 * references to be released on the cm_id and then kfree the cm_id
 389 * object.
 390 */
 391void iw_destroy_cm_id(struct iw_cm_id *cm_id)
 392{
 393        struct iwcm_id_private *cm_id_priv;
 394
 395        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 396        BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
 397
 398        destroy_cm_id(cm_id);
 399
 400        wait_for_completion(&cm_id_priv->destroy_comp);
 401
 402        free_cm_id(cm_id_priv);
 403}
 404EXPORT_SYMBOL(iw_destroy_cm_id);
 405
 406/*
 407 * CM_ID <-- LISTEN
 408 *
 409 * Start listening for connect requests. Generates one CONNECT_REQUEST
 410 * event for each inbound connect request.
 411 */
 412int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
 413{
 414        struct iwcm_id_private *cm_id_priv;
 415        unsigned long flags;
 416        int ret;
 417
 418        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 419
 420        ret = alloc_work_entries(cm_id_priv, backlog);
 421        if (ret)
 422                return ret;
 423
 424        spin_lock_irqsave(&cm_id_priv->lock, flags);
 425        switch (cm_id_priv->state) {
 426        case IW_CM_STATE_IDLE:
 427                cm_id_priv->state = IW_CM_STATE_LISTEN;
 428                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 429                ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
 430                if (ret)
 431                        cm_id_priv->state = IW_CM_STATE_IDLE;
 432                spin_lock_irqsave(&cm_id_priv->lock, flags);
 433                break;
 434        default:
 435                ret = -EINVAL;
 436        }
 437        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 438
 439        return ret;
 440}
 441EXPORT_SYMBOL(iw_cm_listen);
 442
 443/*
 444 * CM_ID <-- IDLE
 445 *
 446 * Rejects an inbound connection request. No events are generated.
 447 */
 448int iw_cm_reject(struct iw_cm_id *cm_id,
 449                 const void *private_data,
 450                 u8 private_data_len)
 451{
 452        struct iwcm_id_private *cm_id_priv;
 453        unsigned long flags;
 454        int ret;
 455
 456        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 457        set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 458
 459        spin_lock_irqsave(&cm_id_priv->lock, flags);
 460        if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
 461                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 462                clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 463                wake_up_all(&cm_id_priv->connect_wait);
 464                return -EINVAL;
 465        }
 466        cm_id_priv->state = IW_CM_STATE_IDLE;
 467        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 468
 469        ret = cm_id->device->iwcm->reject(cm_id, private_data,
 470                                          private_data_len);
 471
 472        clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 473        wake_up_all(&cm_id_priv->connect_wait);
 474
 475        return ret;
 476}
 477EXPORT_SYMBOL(iw_cm_reject);
 478
 479/*
 480 * CM_ID <-- ESTABLISHED
 481 *
 482 * Accepts an inbound connection request and generates an ESTABLISHED
 483 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
 484 * until the ESTABLISHED event is received from the provider.
 485 */
 486int iw_cm_accept(struct iw_cm_id *cm_id,
 487                 struct iw_cm_conn_param *iw_param)
 488{
 489        struct iwcm_id_private *cm_id_priv;
 490        struct ib_qp *qp;
 491        unsigned long flags;
 492        int ret;
 493
 494        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 495        set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 496
 497        spin_lock_irqsave(&cm_id_priv->lock, flags);
 498        if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
 499                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 500                clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 501                wake_up_all(&cm_id_priv->connect_wait);
 502                return -EINVAL;
 503        }
 504        /* Get the ib_qp given the QPN */
 505        qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
 506        if (!qp) {
 507                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 508                return -EINVAL;
 509        }
 510        cm_id->device->iwcm->add_ref(qp);
 511        cm_id_priv->qp = qp;
 512        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 513
 514        ret = cm_id->device->iwcm->accept(cm_id, iw_param);
 515        if (ret) {
 516                /* An error on accept precludes provider events */
 517                BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
 518                cm_id_priv->state = IW_CM_STATE_IDLE;
 519                spin_lock_irqsave(&cm_id_priv->lock, flags);
 520                if (cm_id_priv->qp) {
 521                        cm_id->device->iwcm->rem_ref(qp);
 522                        cm_id_priv->qp = NULL;
 523                }
 524                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 525                clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 526                wake_up_all(&cm_id_priv->connect_wait);
 527        }
 528
 529        return ret;
 530}
 531EXPORT_SYMBOL(iw_cm_accept);
 532
 533/*
 534 * Active Side: CM_ID <-- CONN_SENT
 535 *
 536 * If successful, results in the generation of a CONNECT_REPLY
 537 * event. iw_cm_disconnect and iw_cm_destroy will block until the
 538 * CONNECT_REPLY event is received from the provider.
 539 */
 540int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
 541{
 542        struct iwcm_id_private *cm_id_priv;
 543        int ret;
 544        unsigned long flags;
 545        struct ib_qp *qp;
 546
 547        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 548
 549        ret = alloc_work_entries(cm_id_priv, 4);
 550        if (ret)
 551                return ret;
 552
 553        set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 554        spin_lock_irqsave(&cm_id_priv->lock, flags);
 555
 556        if (cm_id_priv->state != IW_CM_STATE_IDLE) {
 557                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 558                clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 559                wake_up_all(&cm_id_priv->connect_wait);
 560                return -EINVAL;
 561        }
 562
 563        /* Get the ib_qp given the QPN */
 564        qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
 565        if (!qp) {
 566                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 567                return -EINVAL;
 568        }
 569        cm_id->device->iwcm->add_ref(qp);
 570        cm_id_priv->qp = qp;
 571        cm_id_priv->state = IW_CM_STATE_CONN_SENT;
 572        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 573
 574        ret = cm_id->device->iwcm->connect(cm_id, iw_param);
 575        if (ret) {
 576                spin_lock_irqsave(&cm_id_priv->lock, flags);
 577                if (cm_id_priv->qp) {
 578                        cm_id->device->iwcm->rem_ref(qp);
 579                        cm_id_priv->qp = NULL;
 580                }
 581                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 582                BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
 583                cm_id_priv->state = IW_CM_STATE_IDLE;
 584                clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 585                wake_up_all(&cm_id_priv->connect_wait);
 586        }
 587
 588        return ret;
 589}
 590EXPORT_SYMBOL(iw_cm_connect);
 591
 592/*
 593 * Passive Side: new CM_ID <-- CONN_RECV
 594 *
 595 * Handles an inbound connect request. The function creates a new
 596 * iw_cm_id to represent the new connection and inherits the client
 597 * callback function and other attributes from the listening parent.
 598 *
 599 * The work item contains a pointer to the listen_cm_id and the event. The
 600 * listen_cm_id contains the client cm_handler, context and
 601 * device. These are copied when the device is cloned. The event
 602 * contains the new four tuple.
 603 *
 604 * An error on the child should not affect the parent, so this
 605 * function does not return a value.
 606 */
 607static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
 608                                struct iw_cm_event *iw_event)
 609{
 610        unsigned long flags;
 611        struct iw_cm_id *cm_id;
 612        struct iwcm_id_private *cm_id_priv;
 613        int ret;
 614
 615        /*
 616         * The provider should never generate a connection request
 617         * event with a bad status.
 618         */
 619        BUG_ON(iw_event->status);
 620
 621        /*
 622         * We could be destroying the listening id. If so, ignore this
 623         * upcall.
 624         */
 625        spin_lock_irqsave(&listen_id_priv->lock, flags);
 626        if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
 627                spin_unlock_irqrestore(&listen_id_priv->lock, flags);
 628                goto out;
 629        }
 630        spin_unlock_irqrestore(&listen_id_priv->lock, flags);
 631
 632        cm_id = iw_create_cm_id(listen_id_priv->id.device,
 633                                listen_id_priv->id.cm_handler,
 634                                listen_id_priv->id.context);
 635        /* If the cm_id could not be created, ignore the request */
 636        if (IS_ERR(cm_id))
 637                goto out;
 638
 639        cm_id->provider_data = iw_event->provider_data;
 640        cm_id->local_addr = iw_event->local_addr;
 641        cm_id->remote_addr = iw_event->remote_addr;
 642
 643        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 644        cm_id_priv->state = IW_CM_STATE_CONN_RECV;
 645
 646        ret = alloc_work_entries(cm_id_priv, 3);
 647        if (ret) {
 648                iw_cm_reject(cm_id, NULL, 0);
 649                iw_destroy_cm_id(cm_id);
 650                goto out;
 651        }
 652
 653        /* Call the client CM handler */
 654        ret = cm_id->cm_handler(cm_id, iw_event);
 655        if (ret) {
 656                iw_cm_reject(cm_id, NULL, 0);
 657                set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
 658                destroy_cm_id(cm_id);
 659                if (atomic_read(&cm_id_priv->refcount)==0)
 660                        free_cm_id(cm_id_priv);
 661        }
 662
 663out:
 664        if (iw_event->private_data_len)
 665                kfree(iw_event->private_data);
 666}
 667
 668/*
 669 * Passive Side: CM_ID <-- ESTABLISHED
 670 *
 671 * The provider generated an ESTABLISHED event which means that
 672 * the MPA negotion has completed successfully and we are now in MPA
 673 * FPDU mode.
 674 *
 675 * This event can only be received in the CONN_RECV state. If the
 676 * remote peer closed, the ESTABLISHED event would be received followed
 677 * by the CLOSE event. If the app closes, it will block until we wake
 678 * it up after processing this event.
 679 */
 680static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
 681                               struct iw_cm_event *iw_event)
 682{
 683        unsigned long flags;
 684        int ret;
 685
 686        spin_lock_irqsave(&cm_id_priv->lock, flags);
 687
 688        /*
 689         * We clear the CONNECT_WAIT bit here to allow the callback
 690         * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
 691         * from a callback handler is not allowed.
 692         */
 693        clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 694        BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
 695        cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
 696        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 697        ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
 698        wake_up_all(&cm_id_priv->connect_wait);
 699
 700        return ret;
 701}
 702
 703/*
 704 * Active Side: CM_ID <-- ESTABLISHED
 705 *
 706 * The app has called connect and is waiting for the established event to
 707 * post it's requests to the server. This event will wake up anyone
 708 * blocked in iw_cm_disconnect or iw_destroy_id.
 709 */
 710static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
 711                               struct iw_cm_event *iw_event)
 712{
 713        unsigned long flags;
 714        int ret;
 715
 716        spin_lock_irqsave(&cm_id_priv->lock, flags);
 717        /*
 718         * Clear the connect wait bit so a callback function calling
 719         * iw_cm_disconnect will not wait and deadlock this thread
 720         */
 721        clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 722        BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
 723        if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
 724                cm_id_priv->id.local_addr = iw_event->local_addr;
 725                cm_id_priv->id.remote_addr = iw_event->remote_addr;
 726                cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
 727        } else {
 728                /* REJECTED or RESET */
 729                cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
 730                cm_id_priv->qp = NULL;
 731                cm_id_priv->state = IW_CM_STATE_IDLE;
 732        }
 733        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 734        ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
 735
 736        if (iw_event->private_data_len)
 737                kfree(iw_event->private_data);
 738
 739        /* Wake up waiters on connect complete */
 740        wake_up_all(&cm_id_priv->connect_wait);
 741
 742        return ret;
 743}
 744
 745/*
 746 * CM_ID <-- CLOSING
 747 *
 748 * If in the ESTABLISHED state, move to CLOSING.
 749 */
 750static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
 751                                  struct iw_cm_event *iw_event)
 752{
 753        unsigned long flags;
 754
 755        spin_lock_irqsave(&cm_id_priv->lock, flags);
 756        if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
 757                cm_id_priv->state = IW_CM_STATE_CLOSING;
 758        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 759}
 760
 761/*
 762 * CM_ID <-- IDLE
 763 *
 764 * If in the ESTBLISHED or CLOSING states, the QP will have have been
 765 * moved by the provider to the ERR state. Disassociate the CM_ID from
 766 * the QP,  move to IDLE, and remove the 'connected' reference.
 767 *
 768 * If in some other state, the cm_id was destroyed asynchronously.
 769 * This is the last reference that will result in waking up
 770 * the app thread blocked in iw_destroy_cm_id.
 771 */
 772static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
 773                                  struct iw_cm_event *iw_event)
 774{
 775        unsigned long flags;
 776        int ret = 0;
 777        spin_lock_irqsave(&cm_id_priv->lock, flags);
 778
 779        if (cm_id_priv->qp) {
 780                cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
 781                cm_id_priv->qp = NULL;
 782        }
 783        switch (cm_id_priv->state) {
 784        case IW_CM_STATE_ESTABLISHED:
 785        case IW_CM_STATE_CLOSING:
 786                cm_id_priv->state = IW_CM_STATE_IDLE;
 787                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 788                ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
 789                spin_lock_irqsave(&cm_id_priv->lock, flags);
 790                break;
 791        case IW_CM_STATE_DESTROYING:
 792                break;
 793        default:
 794                BUG();
 795        }
 796        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 797
 798        return ret;
 799}
 800
 801static int process_event(struct iwcm_id_private *cm_id_priv,
 802                         struct iw_cm_event *iw_event)
 803{
 804        int ret = 0;
 805
 806        switch (iw_event->event) {
 807        case IW_CM_EVENT_CONNECT_REQUEST:
 808                cm_conn_req_handler(cm_id_priv, iw_event);
 809                break;
 810        case IW_CM_EVENT_CONNECT_REPLY:
 811                ret = cm_conn_rep_handler(cm_id_priv, iw_event);
 812                break;
 813        case IW_CM_EVENT_ESTABLISHED:
 814                ret = cm_conn_est_handler(cm_id_priv, iw_event);
 815                break;
 816        case IW_CM_EVENT_DISCONNECT:
 817                cm_disconnect_handler(cm_id_priv, iw_event);
 818                break;
 819        case IW_CM_EVENT_CLOSE:
 820                ret = cm_close_handler(cm_id_priv, iw_event);
 821                break;
 822        default:
 823                BUG();
 824        }
 825
 826        return ret;
 827}
 828
 829/*
 830 * Process events on the work_list for the cm_id. If the callback
 831 * function requests that the cm_id be deleted, a flag is set in the
 832 * cm_id flags to indicate that when the last reference is
 833 * removed, the cm_id is to be destroyed. This is necessary to
 834 * distinguish between an object that will be destroyed by the app
 835 * thread asleep on the destroy_comp list vs. an object destroyed
 836 * here synchronously when the last reference is removed.
 837 */
 838static void cm_work_handler(struct work_struct *_work)
 839{
 840        struct iwcm_work *work = container_of(_work, struct iwcm_work, work);
 841        struct iw_cm_event levent;
 842        struct iwcm_id_private *cm_id_priv = work->cm_id;
 843        unsigned long flags;
 844        int empty;
 845        int ret = 0;
 846        int destroy_id;
 847
 848        spin_lock_irqsave(&cm_id_priv->lock, flags);
 849        empty = list_empty(&cm_id_priv->work_list);
 850        while (!empty) {
 851                work = list_entry(cm_id_priv->work_list.next,
 852                                  struct iwcm_work, list);
 853                list_del_init(&work->list);
 854                empty = list_empty(&cm_id_priv->work_list);
 855                levent = work->event;
 856                put_work(work);
 857                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 858
 859                ret = process_event(cm_id_priv, &levent);
 860                if (ret) {
 861                        set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
 862                        destroy_cm_id(&cm_id_priv->id);
 863                }
 864                BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
 865                destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
 866                if (iwcm_deref_id(cm_id_priv)) {
 867                        if (destroy_id) {
 868                                BUG_ON(!list_empty(&cm_id_priv->work_list));
 869                                free_cm_id(cm_id_priv);
 870                        }
 871                        return;
 872                }
 873                spin_lock_irqsave(&cm_id_priv->lock, flags);
 874        }
 875        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 876}
 877
 878/*
 879 * This function is called on interrupt context. Schedule events on
 880 * the iwcm_wq thread to allow callback functions to downcall into
 881 * the CM and/or block.  Events are queued to a per-CM_ID
 882 * work_list. If this is the first event on the work_list, the work
 883 * element is also queued on the iwcm_wq thread.
 884 *
 885 * Each event holds a reference on the cm_id. Until the last posted
 886 * event has been delivered and processed, the cm_id cannot be
 887 * deleted.
 888 *
 889 * Returns:
 890 *            0 - the event was handled.
 891 *      -ENOMEM - the event was not handled due to lack of resources.
 892 */
 893static int cm_event_handler(struct iw_cm_id *cm_id,
 894                             struct iw_cm_event *iw_event)
 895{
 896        struct iwcm_work *work;
 897        struct iwcm_id_private *cm_id_priv;
 898        unsigned long flags;
 899        int ret = 0;
 900
 901        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 902
 903        spin_lock_irqsave(&cm_id_priv->lock, flags);
 904        work = get_work(cm_id_priv);
 905        if (!work) {
 906                ret = -ENOMEM;
 907                goto out;
 908        }
 909
 910        INIT_WORK(&work->work, cm_work_handler);
 911        work->cm_id = cm_id_priv;
 912        work->event = *iw_event;
 913
 914        if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
 915             work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
 916            work->event.private_data_len) {
 917                ret = copy_private_data(&work->event);
 918                if (ret) {
 919                        put_work(work);
 920                        goto out;
 921                }
 922        }
 923
 924        atomic_inc(&cm_id_priv->refcount);
 925        if (list_empty(&cm_id_priv->work_list)) {
 926                list_add_tail(&work->list, &cm_id_priv->work_list);
 927                queue_work(iwcm_wq, &work->work);
 928        } else
 929                list_add_tail(&work->list, &cm_id_priv->work_list);
 930out:
 931        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 932        return ret;
 933}
 934
 935static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
 936                                  struct ib_qp_attr *qp_attr,
 937                                  int *qp_attr_mask)
 938{
 939        unsigned long flags;
 940        int ret;
 941
 942        spin_lock_irqsave(&cm_id_priv->lock, flags);
 943        switch (cm_id_priv->state) {
 944        case IW_CM_STATE_IDLE:
 945        case IW_CM_STATE_CONN_SENT:
 946        case IW_CM_STATE_CONN_RECV:
 947        case IW_CM_STATE_ESTABLISHED:
 948                *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
 949                qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE|
 950                                           IB_ACCESS_REMOTE_READ;
 951                ret = 0;
 952                break;
 953        default:
 954                ret = -EINVAL;
 955                break;
 956        }
 957        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 958        return ret;
 959}
 960
 961static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
 962                                  struct ib_qp_attr *qp_attr,
 963                                  int *qp_attr_mask)
 964{
 965        unsigned long flags;
 966        int ret;
 967
 968        spin_lock_irqsave(&cm_id_priv->lock, flags);
 969        switch (cm_id_priv->state) {
 970        case IW_CM_STATE_IDLE:
 971        case IW_CM_STATE_CONN_SENT:
 972        case IW_CM_STATE_CONN_RECV:
 973        case IW_CM_STATE_ESTABLISHED:
 974                *qp_attr_mask = 0;
 975                ret = 0;
 976                break;
 977        default:
 978                ret = -EINVAL;
 979                break;
 980        }
 981        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 982        return ret;
 983}
 984
 985int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
 986                       struct ib_qp_attr *qp_attr,
 987                       int *qp_attr_mask)
 988{
 989        struct iwcm_id_private *cm_id_priv;
 990        int ret;
 991
 992        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 993        switch (qp_attr->qp_state) {
 994        case IB_QPS_INIT:
 995        case IB_QPS_RTR:
 996                ret = iwcm_init_qp_init_attr(cm_id_priv,
 997                                             qp_attr, qp_attr_mask);
 998                break;
 999        case IB_QPS_RTS:
1000                ret = iwcm_init_qp_rts_attr(cm_id_priv,
1001                                            qp_attr, qp_attr_mask);
1002                break;
1003        default:
1004                ret = -EINVAL;
1005                break;
1006        }
1007        return ret;
1008}
1009EXPORT_SYMBOL(iw_cm_init_qp_attr);
1010
1011static int __init iw_cm_init(void)
1012{
1013        iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
1014        if (!iwcm_wq)
1015                return -ENOMEM;
1016
1017        return 0;
1018}
1019
1020static void __exit iw_cm_cleanup(void)
1021{
1022        destroy_workqueue(iwcm_wq);
1023}
1024
1025module_init(iw_cm_init);
1026module_exit(iw_cm_cleanup);
1027