linux/drivers/infiniband/ulp/iser/iser_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
   3 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33#include <linux/kernel.h>
  34#include <linux/module.h>
  35#include <linux/slab.h>
  36#include <linux/delay.h>
  37
  38#include "iscsi_iser.h"
  39
  40#define ISCSI_ISER_MAX_CONN     8
  41#define ISER_MAX_RX_CQ_LEN      (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
  42#define ISER_MAX_TX_CQ_LEN      (ISER_QP_MAX_REQ_DTOS  * ISCSI_ISER_MAX_CONN)
  43
  44static void iser_cq_tasklet_fn(unsigned long data);
  45static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
  46
  47static void iser_cq_event_callback(struct ib_event *cause, void *context)
  48{
  49        iser_err("got cq event %d \n", cause->event);
  50}
  51
  52static void iser_qp_event_callback(struct ib_event *cause, void *context)
  53{
  54        iser_err("got qp event %d\n",cause->event);
  55}
  56
  57static void iser_event_handler(struct ib_event_handler *handler,
  58                                struct ib_event *event)
  59{
  60        iser_err("async event %d on device %s port %d\n", event->event,
  61                event->device->name, event->element.port_num);
  62}
  63
  64/**
  65 * iser_create_device_ib_res - creates Protection Domain (PD), Completion
  66 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
  67 * the adapator.
  68 *
  69 * returns 0 on success, -1 on failure
  70 */
  71static int iser_create_device_ib_res(struct iser_device *device)
  72{
  73        device->pd = ib_alloc_pd(device->ib_device);
  74        if (IS_ERR(device->pd))
  75                goto pd_err;
  76
  77        device->rx_cq = ib_create_cq(device->ib_device,
  78                                  iser_cq_callback,
  79                                  iser_cq_event_callback,
  80                                  (void *)device,
  81                                  ISER_MAX_RX_CQ_LEN, 0);
  82        if (IS_ERR(device->rx_cq))
  83                goto rx_cq_err;
  84
  85        device->tx_cq = ib_create_cq(device->ib_device,
  86                                  NULL, iser_cq_event_callback,
  87                                  (void *)device,
  88                                  ISER_MAX_TX_CQ_LEN, 0);
  89
  90        if (IS_ERR(device->tx_cq))
  91                goto tx_cq_err;
  92
  93        if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
  94                goto cq_arm_err;
  95
  96        tasklet_init(&device->cq_tasklet,
  97                     iser_cq_tasklet_fn,
  98                     (unsigned long)device);
  99
 100        device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
 101                                   IB_ACCESS_REMOTE_WRITE |
 102                                   IB_ACCESS_REMOTE_READ);
 103        if (IS_ERR(device->mr))
 104                goto dma_mr_err;
 105
 106        INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
 107                                iser_event_handler);
 108        if (ib_register_event_handler(&device->event_handler))
 109                goto handler_err;
 110
 111        return 0;
 112
 113handler_err:
 114        ib_dereg_mr(device->mr);
 115dma_mr_err:
 116        tasklet_kill(&device->cq_tasklet);
 117cq_arm_err:
 118        ib_destroy_cq(device->tx_cq);
 119tx_cq_err:
 120        ib_destroy_cq(device->rx_cq);
 121rx_cq_err:
 122        ib_dealloc_pd(device->pd);
 123pd_err:
 124        iser_err("failed to allocate an IB resource\n");
 125        return -1;
 126}
 127
 128/**
 129 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
 130 * CQ and PD created with the device associated with the adapator.
 131 */
 132static void iser_free_device_ib_res(struct iser_device *device)
 133{
 134        BUG_ON(device->mr == NULL);
 135
 136        tasklet_kill(&device->cq_tasklet);
 137        (void)ib_unregister_event_handler(&device->event_handler);
 138        (void)ib_dereg_mr(device->mr);
 139        (void)ib_destroy_cq(device->tx_cq);
 140        (void)ib_destroy_cq(device->rx_cq);
 141        (void)ib_dealloc_pd(device->pd);
 142
 143        device->mr = NULL;
 144        device->tx_cq = NULL;
 145        device->rx_cq = NULL;
 146        device->pd = NULL;
 147}
 148
 149/**
 150 * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP)
 151 *
 152 * returns 0 on success, -1 on failure
 153 */
 154static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
 155{
 156        struct iser_device      *device;
 157        struct ib_qp_init_attr  init_attr;
 158        int                     req_err, resp_err, ret = -ENOMEM;
 159        struct ib_fmr_pool_param params;
 160
 161        BUG_ON(ib_conn->device == NULL);
 162
 163        device = ib_conn->device;
 164
 165        ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
 166                                        ISER_RX_LOGIN_SIZE, GFP_KERNEL);
 167        if (!ib_conn->login_buf)
 168                goto out_err;
 169
 170        ib_conn->login_req_buf  = ib_conn->login_buf;
 171        ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN;
 172
 173        ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
 174                                (void *)ib_conn->login_req_buf,
 175                                ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
 176
 177        ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
 178                                (void *)ib_conn->login_resp_buf,
 179                                ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
 180
 181        req_err  = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
 182        resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);
 183
 184        if (req_err || resp_err) {
 185                if (req_err)
 186                        ib_conn->login_req_dma = 0;
 187                if (resp_err)
 188                        ib_conn->login_resp_dma = 0;
 189                goto out_err;
 190        }
 191
 192        ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
 193                                    (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
 194                                    GFP_KERNEL);
 195        if (!ib_conn->page_vec)
 196                goto out_err;
 197
 198        ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
 199
 200        params.page_shift        = SHIFT_4K;
 201        /* when the first/last SG element are not start/end *
 202         * page aligned, the map whould be of N+1 pages     */
 203        params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
 204        /* make the pool size twice the max number of SCSI commands *
 205         * the ML is expected to queue, watermark for unmap at 50%  */
 206        params.pool_size         = ISCSI_DEF_XMIT_CMDS_MAX * 2;
 207        params.dirty_watermark   = ISCSI_DEF_XMIT_CMDS_MAX;
 208        params.cache             = 0;
 209        params.flush_function    = NULL;
 210        params.access            = (IB_ACCESS_LOCAL_WRITE  |
 211                                    IB_ACCESS_REMOTE_WRITE |
 212                                    IB_ACCESS_REMOTE_READ);
 213
 214        ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
 215        if (IS_ERR(ib_conn->fmr_pool)) {
 216                ret = PTR_ERR(ib_conn->fmr_pool);
 217                ib_conn->fmr_pool = NULL;
 218                goto out_err;
 219        }
 220
 221        memset(&init_attr, 0, sizeof init_attr);
 222
 223        init_attr.event_handler = iser_qp_event_callback;
 224        init_attr.qp_context    = (void *)ib_conn;
 225        init_attr.send_cq       = device->tx_cq;
 226        init_attr.recv_cq       = device->rx_cq;
 227        init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
 228        init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
 229        init_attr.cap.max_send_sge = 2;
 230        init_attr.cap.max_recv_sge = 1;
 231        init_attr.sq_sig_type   = IB_SIGNAL_REQ_WR;
 232        init_attr.qp_type       = IB_QPT_RC;
 233
 234        ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
 235        if (ret)
 236                goto out_err;
 237
 238        ib_conn->qp = ib_conn->cma_id->qp;
 239        iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
 240                 ib_conn, ib_conn->cma_id,
 241                 ib_conn->fmr_pool, ib_conn->cma_id->qp);
 242        return ret;
 243
 244out_err:
 245        iser_err("unable to alloc mem or create resource, err %d\n", ret);
 246        return ret;
 247}
 248
 249/**
 250 * releases the FMR pool, QP and CMA ID objects, returns 0 on success,
 251 * -1 on failure
 252 */
 253static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
 254{
 255        BUG_ON(ib_conn == NULL);
 256
 257        iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n",
 258                 ib_conn, ib_conn->cma_id,
 259                 ib_conn->fmr_pool, ib_conn->qp);
 260
 261        /* qp is created only once both addr & route are resolved */
 262        if (ib_conn->fmr_pool != NULL)
 263                ib_destroy_fmr_pool(ib_conn->fmr_pool);
 264
 265        if (ib_conn->qp != NULL)
 266                rdma_destroy_qp(ib_conn->cma_id);
 267
 268        /* if cma handler context, the caller acts s.t the cma destroy the id */
 269        if (ib_conn->cma_id != NULL && can_destroy_id)
 270                rdma_destroy_id(ib_conn->cma_id);
 271
 272        ib_conn->fmr_pool = NULL;
 273        ib_conn->qp       = NULL;
 274        ib_conn->cma_id   = NULL;
 275        kfree(ib_conn->page_vec);
 276
 277        if (ib_conn->login_buf) {
 278                if (ib_conn->login_req_dma)
 279                        ib_dma_unmap_single(ib_conn->device->ib_device,
 280                                ib_conn->login_req_dma,
 281                                ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
 282                if (ib_conn->login_resp_dma)
 283                        ib_dma_unmap_single(ib_conn->device->ib_device,
 284                                ib_conn->login_resp_dma,
 285                                ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
 286                kfree(ib_conn->login_buf);
 287        }
 288
 289        return 0;
 290}
 291
 292/**
 293 * based on the resolved device node GUID see if there already allocated
 294 * device for this device. If there's no such, create one.
 295 */
 296static
 297struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
 298{
 299        struct iser_device *device;
 300
 301        mutex_lock(&ig.device_list_mutex);
 302
 303        list_for_each_entry(device, &ig.device_list, ig_list)
 304                /* find if there's a match using the node GUID */
 305                if (device->ib_device->node_guid == cma_id->device->node_guid)
 306                        goto inc_refcnt;
 307
 308        device = kzalloc(sizeof *device, GFP_KERNEL);
 309        if (device == NULL)
 310                goto out;
 311
 312        /* assign this device to the device */
 313        device->ib_device = cma_id->device;
 314        /* init the device and link it into ig device list */
 315        if (iser_create_device_ib_res(device)) {
 316                kfree(device);
 317                device = NULL;
 318                goto out;
 319        }
 320        list_add(&device->ig_list, &ig.device_list);
 321
 322inc_refcnt:
 323        device->refcount++;
 324out:
 325        mutex_unlock(&ig.device_list_mutex);
 326        return device;
 327}
 328
 329/* if there's no demand for this device, release it */
 330static void iser_device_try_release(struct iser_device *device)
 331{
 332        mutex_lock(&ig.device_list_mutex);
 333        device->refcount--;
 334        iser_err("device %p refcount %d\n",device,device->refcount);
 335        if (!device->refcount) {
 336                iser_free_device_ib_res(device);
 337                list_del(&device->ig_list);
 338                kfree(device);
 339        }
 340        mutex_unlock(&ig.device_list_mutex);
 341}
 342
 343static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
 344                                     enum iser_ib_conn_state comp,
 345                                     enum iser_ib_conn_state exch)
 346{
 347        int ret;
 348
 349        spin_lock_bh(&ib_conn->lock);
 350        if ((ret = (ib_conn->state == comp)))
 351                ib_conn->state = exch;
 352        spin_unlock_bh(&ib_conn->lock);
 353        return ret;
 354}
 355
 356/**
 357 * Frees all conn objects and deallocs conn descriptor
 358 */
 359static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)
 360{
 361        struct iser_device  *device = ib_conn->device;
 362
 363        BUG_ON(ib_conn->state != ISER_CONN_DOWN);
 364
 365        mutex_lock(&ig.connlist_mutex);
 366        list_del(&ib_conn->conn_list);
 367        mutex_unlock(&ig.connlist_mutex);
 368        iser_free_rx_descriptors(ib_conn);
 369        iser_free_ib_conn_res(ib_conn, can_destroy_id);
 370        ib_conn->device = NULL;
 371        /* on EVENT_ADDR_ERROR there's no device yet for this conn */
 372        if (device != NULL)
 373                iser_device_try_release(device);
 374        iscsi_destroy_endpoint(ib_conn->ep);
 375}
 376
 377void iser_conn_get(struct iser_conn *ib_conn)
 378{
 379        atomic_inc(&ib_conn->refcount);
 380}
 381
 382int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id)
 383{
 384        if (atomic_dec_and_test(&ib_conn->refcount)) {
 385                iser_conn_release(ib_conn, can_destroy_id);
 386                return 1;
 387        }
 388        return 0;
 389}
 390
 391/**
 392 * triggers start of the disconnect procedures and wait for them to be done
 393 */
 394void iser_conn_terminate(struct iser_conn *ib_conn)
 395{
 396        int err = 0;
 397
 398        /* change the ib conn state only if the conn is UP, however always call
 399         * rdma_disconnect since this is the only way to cause the CMA to change
 400         * the QP state to ERROR
 401         */
 402
 403        iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING);
 404        err = rdma_disconnect(ib_conn->cma_id);
 405        if (err)
 406                iser_err("Failed to disconnect, conn: 0x%p err %d\n",
 407                         ib_conn,err);
 408
 409        wait_event_interruptible(ib_conn->wait,
 410                                 ib_conn->state == ISER_CONN_DOWN);
 411
 412        iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
 413}
 414
 415static int iser_connect_error(struct rdma_cm_id *cma_id)
 416{
 417        struct iser_conn *ib_conn;
 418        ib_conn = (struct iser_conn *)cma_id->context;
 419
 420        ib_conn->state = ISER_CONN_DOWN;
 421        wake_up_interruptible(&ib_conn->wait);
 422        return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
 423}
 424
 425static int iser_addr_handler(struct rdma_cm_id *cma_id)
 426{
 427        struct iser_device *device;
 428        struct iser_conn   *ib_conn;
 429        int    ret;
 430
 431        device = iser_device_find_by_ib_device(cma_id);
 432        if (!device) {
 433                iser_err("device lookup/creation failed\n");
 434                return iser_connect_error(cma_id);
 435        }
 436
 437        ib_conn = (struct iser_conn *)cma_id->context;
 438        ib_conn->device = device;
 439
 440        ret = rdma_resolve_route(cma_id, 1000);
 441        if (ret) {
 442                iser_err("resolve route failed: %d\n", ret);
 443                return iser_connect_error(cma_id);
 444        }
 445
 446        return 0;
 447}
 448
 449static int iser_route_handler(struct rdma_cm_id *cma_id)
 450{
 451        struct rdma_conn_param conn_param;
 452        int    ret;
 453
 454        ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
 455        if (ret)
 456                goto failure;
 457
 458        memset(&conn_param, 0, sizeof conn_param);
 459        conn_param.responder_resources = 4;
 460        conn_param.initiator_depth     = 1;
 461        conn_param.retry_count         = 7;
 462        conn_param.rnr_retry_count     = 6;
 463
 464        ret = rdma_connect(cma_id, &conn_param);
 465        if (ret) {
 466                iser_err("failure connecting: %d\n", ret);
 467                goto failure;
 468        }
 469
 470        return 0;
 471failure:
 472        return iser_connect_error(cma_id);
 473}
 474
 475static void iser_connected_handler(struct rdma_cm_id *cma_id)
 476{
 477        struct iser_conn *ib_conn;
 478
 479        ib_conn = (struct iser_conn *)cma_id->context;
 480        ib_conn->state = ISER_CONN_UP;
 481        wake_up_interruptible(&ib_conn->wait);
 482}
 483
 484static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
 485{
 486        struct iser_conn *ib_conn;
 487        int ret;
 488
 489        ib_conn = (struct iser_conn *)cma_id->context;
 490
 491        /* getting here when the state is UP means that the conn is being *
 492         * terminated asynchronously from the iSCSI layer's perspective.  */
 493        if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
 494                                      ISER_CONN_TERMINATING))
 495                iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
 496                                   ISCSI_ERR_CONN_FAILED);
 497
 498        /* Complete the termination process if no posts are pending */
 499        if (ib_conn->post_recv_buf_count == 0 &&
 500            (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
 501                ib_conn->state = ISER_CONN_DOWN;
 502                wake_up_interruptible(&ib_conn->wait);
 503        }
 504
 505        ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
 506        return ret;
 507}
 508
 509static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 510{
 511        int ret = 0;
 512
 513        iser_err("event %d status %d conn %p id %p\n",
 514                event->event, event->status, cma_id->context, cma_id);
 515
 516        switch (event->event) {
 517        case RDMA_CM_EVENT_ADDR_RESOLVED:
 518                ret = iser_addr_handler(cma_id);
 519                break;
 520        case RDMA_CM_EVENT_ROUTE_RESOLVED:
 521                ret = iser_route_handler(cma_id);
 522                break;
 523        case RDMA_CM_EVENT_ESTABLISHED:
 524                iser_connected_handler(cma_id);
 525                break;
 526        case RDMA_CM_EVENT_ADDR_ERROR:
 527        case RDMA_CM_EVENT_ROUTE_ERROR:
 528        case RDMA_CM_EVENT_CONNECT_ERROR:
 529        case RDMA_CM_EVENT_UNREACHABLE:
 530        case RDMA_CM_EVENT_REJECTED:
 531                ret = iser_connect_error(cma_id);
 532                break;
 533        case RDMA_CM_EVENT_DISCONNECTED:
 534        case RDMA_CM_EVENT_DEVICE_REMOVAL:
 535        case RDMA_CM_EVENT_ADDR_CHANGE:
 536                ret = iser_disconnected_handler(cma_id);
 537                break;
 538        default:
 539                iser_err("Unexpected RDMA CM event (%d)\n", event->event);
 540                break;
 541        }
 542        return ret;
 543}
 544
 545void iser_conn_init(struct iser_conn *ib_conn)
 546{
 547        ib_conn->state = ISER_CONN_INIT;
 548        init_waitqueue_head(&ib_conn->wait);
 549        ib_conn->post_recv_buf_count = 0;
 550        atomic_set(&ib_conn->post_send_buf_count, 0);
 551        atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */
 552        INIT_LIST_HEAD(&ib_conn->conn_list);
 553        spin_lock_init(&ib_conn->lock);
 554}
 555
 556 /**
 557 * starts the process of connecting to the target
 558 * sleeps until the connection is established or rejected
 559 */
 560int iser_connect(struct iser_conn   *ib_conn,
 561                 struct sockaddr_in *src_addr,
 562                 struct sockaddr_in *dst_addr,
 563                 int                 non_blocking)
 564{
 565        struct sockaddr *src, *dst;
 566        int err = 0;
 567
 568        sprintf(ib_conn->name, "%pI4:%d",
 569                &dst_addr->sin_addr.s_addr, dst_addr->sin_port);
 570
 571        /* the device is known only --after-- address resolution */
 572        ib_conn->device = NULL;
 573
 574        iser_err("connecting to: %pI4, port 0x%x\n",
 575                 &dst_addr->sin_addr, dst_addr->sin_port);
 576
 577        ib_conn->state = ISER_CONN_PENDING;
 578
 579        iser_conn_get(ib_conn); /* ref ib conn's cma id */
 580        ib_conn->cma_id = rdma_create_id(iser_cma_handler,
 581                                             (void *)ib_conn,
 582                                             RDMA_PS_TCP, IB_QPT_RC);
 583        if (IS_ERR(ib_conn->cma_id)) {
 584                err = PTR_ERR(ib_conn->cma_id);
 585                iser_err("rdma_create_id failed: %d\n", err);
 586                goto id_failure;
 587        }
 588
 589        src = (struct sockaddr *)src_addr;
 590        dst = (struct sockaddr *)dst_addr;
 591        err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000);
 592        if (err) {
 593                iser_err("rdma_resolve_addr failed: %d\n", err);
 594                goto addr_failure;
 595        }
 596
 597        if (!non_blocking) {
 598                wait_event_interruptible(ib_conn->wait,
 599                                         (ib_conn->state != ISER_CONN_PENDING));
 600
 601                if (ib_conn->state != ISER_CONN_UP) {
 602                        err =  -EIO;
 603                        goto connect_failure;
 604                }
 605        }
 606
 607        mutex_lock(&ig.connlist_mutex);
 608        list_add(&ib_conn->conn_list, &ig.connlist);
 609        mutex_unlock(&ig.connlist_mutex);
 610        return 0;
 611
 612id_failure:
 613        ib_conn->cma_id = NULL;
 614addr_failure:
 615        ib_conn->state = ISER_CONN_DOWN;
 616connect_failure:
 617        iser_conn_release(ib_conn, 1);
 618        return err;
 619}
 620
 621/**
 622 * iser_reg_page_vec - Register physical memory
 623 *
 624 * returns: 0 on success, errno code on failure
 625 */
 626int iser_reg_page_vec(struct iser_conn     *ib_conn,
 627                      struct iser_page_vec *page_vec,
 628                      struct iser_mem_reg  *mem_reg)
 629{
 630        struct ib_pool_fmr *mem;
 631        u64                io_addr;
 632        u64                *page_list;
 633        int                status;
 634
 635        page_list = page_vec->pages;
 636        io_addr   = page_list[0];
 637
 638        mem  = ib_fmr_pool_map_phys(ib_conn->fmr_pool,
 639                                    page_list,
 640                                    page_vec->length,
 641                                    io_addr);
 642
 643        if (IS_ERR(mem)) {
 644                status = (int)PTR_ERR(mem);
 645                iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
 646                return status;
 647        }
 648
 649        mem_reg->lkey  = mem->fmr->lkey;
 650        mem_reg->rkey  = mem->fmr->rkey;
 651        mem_reg->len   = page_vec->length * SIZE_4K;
 652        mem_reg->va    = io_addr;
 653        mem_reg->is_fmr = 1;
 654        mem_reg->mem_h = (void *)mem;
 655
 656        mem_reg->va   += page_vec->offset;
 657        mem_reg->len   = page_vec->data_size;
 658
 659        iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
 660                 "entry[0]: (0x%08lx,%ld)] -> "
 661                 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
 662                 page_vec, page_vec->length,
 663                 (unsigned long)page_vec->pages[0],
 664                 (unsigned long)page_vec->data_size,
 665                 (unsigned int)mem_reg->lkey, mem_reg->mem_h,
 666                 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
 667        return 0;
 668}
 669
 670/**
 671 * Unregister (previosuly registered) memory.
 672 */
 673void iser_unreg_mem(struct iser_mem_reg *reg)
 674{
 675        int ret;
 676
 677        iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
 678
 679        ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
 680        if (ret)
 681                iser_err("ib_fmr_pool_unmap failed %d\n", ret);
 682
 683        reg->mem_h = NULL;
 684}
 685
 686int iser_post_recvl(struct iser_conn *ib_conn)
 687{
 688        struct ib_recv_wr rx_wr, *rx_wr_failed;
 689        struct ib_sge     sge;
 690        int ib_ret;
 691
 692        sge.addr   = ib_conn->login_resp_dma;
 693        sge.length = ISER_RX_LOGIN_SIZE;
 694        sge.lkey   = ib_conn->device->mr->lkey;
 695
 696        rx_wr.wr_id   = (unsigned long)ib_conn->login_resp_buf;
 697        rx_wr.sg_list = &sge;
 698        rx_wr.num_sge = 1;
 699        rx_wr.next    = NULL;
 700
 701        ib_conn->post_recv_buf_count++;
 702        ib_ret  = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
 703        if (ib_ret) {
 704                iser_err("ib_post_recv failed ret=%d\n", ib_ret);
 705                ib_conn->post_recv_buf_count--;
 706        }
 707        return ib_ret;
 708}
 709
 710int iser_post_recvm(struct iser_conn *ib_conn, int count)
 711{
 712        struct ib_recv_wr *rx_wr, *rx_wr_failed;
 713        int i, ib_ret;
 714        unsigned int my_rx_head = ib_conn->rx_desc_head;
 715        struct iser_rx_desc *rx_desc;
 716
 717        for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
 718                rx_desc         = &ib_conn->rx_descs[my_rx_head];
 719                rx_wr->wr_id    = (unsigned long)rx_desc;
 720                rx_wr->sg_list  = &rx_desc->rx_sg;
 721                rx_wr->num_sge  = 1;
 722                rx_wr->next     = rx_wr + 1;
 723                my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
 724        }
 725
 726        rx_wr--;
 727        rx_wr->next = NULL; /* mark end of work requests list */
 728
 729        ib_conn->post_recv_buf_count += count;
 730        ib_ret  = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
 731        if (ib_ret) {
 732                iser_err("ib_post_recv failed ret=%d\n", ib_ret);
 733                ib_conn->post_recv_buf_count -= count;
 734        } else
 735                ib_conn->rx_desc_head = my_rx_head;
 736        return ib_ret;
 737}
 738
 739
 740/**
 741 * iser_start_send - Initiate a Send DTO operation
 742 *
 743 * returns 0 on success, -1 on failure
 744 */
 745int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc)
 746{
 747        int               ib_ret;
 748        struct ib_send_wr send_wr, *send_wr_failed;
 749
 750        ib_dma_sync_single_for_device(ib_conn->device->ib_device,
 751                tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
 752
 753        send_wr.next       = NULL;
 754        send_wr.wr_id      = (unsigned long)tx_desc;
 755        send_wr.sg_list    = tx_desc->tx_sg;
 756        send_wr.num_sge    = tx_desc->num_sge;
 757        send_wr.opcode     = IB_WR_SEND;
 758        send_wr.send_flags = IB_SEND_SIGNALED;
 759
 760        atomic_inc(&ib_conn->post_send_buf_count);
 761
 762        ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
 763        if (ib_ret) {
 764                iser_err("ib_post_send failed, ret:%d\n", ib_ret);
 765                atomic_dec(&ib_conn->post_send_buf_count);
 766        }
 767        return ib_ret;
 768}
 769
 770static void iser_handle_comp_error(struct iser_tx_desc *desc,
 771                                struct iser_conn *ib_conn)
 772{
 773        if (desc && desc->type == ISCSI_TX_DATAOUT)
 774                kmem_cache_free(ig.desc_cache, desc);
 775
 776        if (ib_conn->post_recv_buf_count == 0 &&
 777            atomic_read(&ib_conn->post_send_buf_count) == 0) {
 778                /* getting here when the state is UP means that the conn is *
 779                 * being terminated asynchronously from the iSCSI layer's   *
 780                 * perspective.                                             */
 781                if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
 782                    ISER_CONN_TERMINATING))
 783                        iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
 784                                           ISCSI_ERR_CONN_FAILED);
 785
 786                /* no more non completed posts to the QP, complete the
 787                 * termination process w.o worrying on disconnect event */
 788                ib_conn->state = ISER_CONN_DOWN;
 789                wake_up_interruptible(&ib_conn->wait);
 790        }
 791}
 792
 793static int iser_drain_tx_cq(struct iser_device  *device)
 794{
 795        struct ib_cq  *cq = device->tx_cq;
 796        struct ib_wc  wc;
 797        struct iser_tx_desc *tx_desc;
 798        struct iser_conn *ib_conn;
 799        int completed_tx = 0;
 800
 801        while (ib_poll_cq(cq, 1, &wc) == 1) {
 802                tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id;
 803                ib_conn = wc.qp->qp_context;
 804                if (wc.status == IB_WC_SUCCESS) {
 805                        if (wc.opcode == IB_WC_SEND)
 806                                iser_snd_completion(tx_desc, ib_conn);
 807                        else
 808                                iser_err("expected opcode %d got %d\n",
 809                                        IB_WC_SEND, wc.opcode);
 810                } else {
 811                        iser_err("tx id %llx status %d vend_err %x\n",
 812                                wc.wr_id, wc.status, wc.vendor_err);
 813                        atomic_dec(&ib_conn->post_send_buf_count);
 814                        iser_handle_comp_error(tx_desc, ib_conn);
 815                }
 816                completed_tx++;
 817        }
 818        return completed_tx;
 819}
 820
 821
 822static void iser_cq_tasklet_fn(unsigned long data)
 823{
 824         struct iser_device  *device = (struct iser_device *)data;
 825         struct ib_cq        *cq = device->rx_cq;
 826         struct ib_wc        wc;
 827         struct iser_rx_desc *desc;
 828         unsigned long       xfer_len;
 829        struct iser_conn *ib_conn;
 830        int completed_tx, completed_rx;
 831        completed_tx = completed_rx = 0;
 832
 833        while (ib_poll_cq(cq, 1, &wc) == 1) {
 834                desc     = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
 835                BUG_ON(desc == NULL);
 836                ib_conn = wc.qp->qp_context;
 837                if (wc.status == IB_WC_SUCCESS) {
 838                        if (wc.opcode == IB_WC_RECV) {
 839                                xfer_len = (unsigned long)wc.byte_len;
 840                                iser_rcv_completion(desc, xfer_len, ib_conn);
 841                        } else
 842                                iser_err("expected opcode %d got %d\n",
 843                                        IB_WC_RECV, wc.opcode);
 844                } else {
 845                        if (wc.status != IB_WC_WR_FLUSH_ERR)
 846                                iser_err("rx id %llx status %d vend_err %x\n",
 847                                        wc.wr_id, wc.status, wc.vendor_err);
 848                        ib_conn->post_recv_buf_count--;
 849                        iser_handle_comp_error(NULL, ib_conn);
 850                }
 851                completed_rx++;
 852                if (!(completed_rx & 63))
 853                        completed_tx += iser_drain_tx_cq(device);
 854        }
 855        /* #warning "it is assumed here that arming CQ only once its empty" *
 856         * " would not cause interrupts to be missed"                       */
 857        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 858
 859        completed_tx += iser_drain_tx_cq(device);
 860        iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
 861}
 862
 863static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
 864{
 865        struct iser_device  *device = (struct iser_device *)cq_context;
 866
 867        tasklet_schedule(&device->cq_tasklet);
 868}
 869