linux/drivers/infiniband/ulp/iser/iser_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
   3 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33#include <linux/kernel.h>
  34#include <linux/module.h>
  35#include <linux/delay.h>
  36
  37#include "iscsi_iser.h"
  38
  39#define ISCSI_ISER_MAX_CONN     8
  40#define ISER_MAX_CQ_LEN         ((ISER_QP_MAX_RECV_DTOS + \
  41                                ISER_QP_MAX_REQ_DTOS) *   \
  42                                 ISCSI_ISER_MAX_CONN)
  43
  44static void iser_cq_tasklet_fn(unsigned long data);
  45static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
  46
  47static void iser_cq_event_callback(struct ib_event *cause, void *context)
  48{
  49        iser_err("got cq event %d \n", cause->event);
  50}
  51
  52static void iser_qp_event_callback(struct ib_event *cause, void *context)
  53{
  54        iser_err("got qp event %d\n",cause->event);
  55}
  56
  57/**
  58 * iser_create_device_ib_res - creates Protection Domain (PD), Completion
  59 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
  60 * the adapator.
  61 *
  62 * returns 0 on success, -1 on failure
  63 */
  64static int iser_create_device_ib_res(struct iser_device *device)
  65{
  66        device->pd = ib_alloc_pd(device->ib_device);
  67        if (IS_ERR(device->pd))
  68                goto pd_err;
  69
  70        device->cq = ib_create_cq(device->ib_device,
  71                                  iser_cq_callback,
  72                                  iser_cq_event_callback,
  73                                  (void *)device,
  74                                  ISER_MAX_CQ_LEN, 0);
  75        if (IS_ERR(device->cq))
  76                goto cq_err;
  77
  78        if (ib_req_notify_cq(device->cq, IB_CQ_NEXT_COMP))
  79                goto cq_arm_err;
  80
  81        tasklet_init(&device->cq_tasklet,
  82                     iser_cq_tasklet_fn,
  83                     (unsigned long)device);
  84
  85        device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
  86                                   IB_ACCESS_REMOTE_WRITE |
  87                                   IB_ACCESS_REMOTE_READ);
  88        if (IS_ERR(device->mr))
  89                goto dma_mr_err;
  90
  91        return 0;
  92
  93dma_mr_err:
  94        tasklet_kill(&device->cq_tasklet);
  95cq_arm_err:
  96        ib_destroy_cq(device->cq);
  97cq_err:
  98        ib_dealloc_pd(device->pd);
  99pd_err:
 100        iser_err("failed to allocate an IB resource\n");
 101        return -1;
 102}
 103
 104/**
 105 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
 106 * CQ and PD created with the device associated with the adapator.
 107 */
 108static void iser_free_device_ib_res(struct iser_device *device)
 109{
 110        BUG_ON(device->mr == NULL);
 111
 112        tasklet_kill(&device->cq_tasklet);
 113
 114        (void)ib_dereg_mr(device->mr);
 115        (void)ib_destroy_cq(device->cq);
 116        (void)ib_dealloc_pd(device->pd);
 117
 118        device->mr = NULL;
 119        device->cq = NULL;
 120        device->pd = NULL;
 121}
 122
 123/**
 124 * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP)
 125 *
 126 * returns 0 on success, -1 on failure
 127 */
 128static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
 129{
 130        struct iser_device      *device;
 131        struct ib_qp_init_attr  init_attr;
 132        int                     ret;
 133        struct ib_fmr_pool_param params;
 134
 135        BUG_ON(ib_conn->device == NULL);
 136
 137        device = ib_conn->device;
 138
 139        ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
 140                                    (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
 141                                    GFP_KERNEL);
 142        if (!ib_conn->page_vec) {
 143                ret = -ENOMEM;
 144                goto alloc_err;
 145        }
 146        ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
 147
 148        params.page_shift        = SHIFT_4K;
 149        /* when the first/last SG element are not start/end *
 150         * page aligned, the map whould be of N+1 pages     */
 151        params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
 152        /* make the pool size twice the max number of SCSI commands *
 153         * the ML is expected to queue, watermark for unmap at 50%  */
 154        params.pool_size         = ISCSI_DEF_XMIT_CMDS_MAX * 2;
 155        params.dirty_watermark   = ISCSI_DEF_XMIT_CMDS_MAX;
 156        params.cache             = 0;
 157        params.flush_function    = NULL;
 158        params.access            = (IB_ACCESS_LOCAL_WRITE  |
 159                                    IB_ACCESS_REMOTE_WRITE |
 160                                    IB_ACCESS_REMOTE_READ);
 161
 162        ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
 163        if (IS_ERR(ib_conn->fmr_pool)) {
 164                ret = PTR_ERR(ib_conn->fmr_pool);
 165                goto fmr_pool_err;
 166        }
 167
 168        memset(&init_attr, 0, sizeof init_attr);
 169
 170        init_attr.event_handler = iser_qp_event_callback;
 171        init_attr.qp_context    = (void *)ib_conn;
 172        init_attr.send_cq       = device->cq;
 173        init_attr.recv_cq       = device->cq;
 174        init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
 175        init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
 176        init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN;
 177        init_attr.cap.max_recv_sge = 2;
 178        init_attr.sq_sig_type   = IB_SIGNAL_REQ_WR;
 179        init_attr.qp_type       = IB_QPT_RC;
 180
 181        ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
 182        if (ret)
 183                goto qp_err;
 184
 185        ib_conn->qp = ib_conn->cma_id->qp;
 186        iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
 187                 ib_conn, ib_conn->cma_id,
 188                 ib_conn->fmr_pool, ib_conn->cma_id->qp);
 189        return ret;
 190
 191qp_err:
 192        (void)ib_destroy_fmr_pool(ib_conn->fmr_pool);
 193fmr_pool_err:
 194        kfree(ib_conn->page_vec);
 195alloc_err:
 196        iser_err("unable to alloc mem or create resource, err %d\n", ret);
 197        return ret;
 198}
 199
 200/**
 201 * releases the FMR pool, QP and CMA ID objects, returns 0 on success,
 202 * -1 on failure
 203 */
 204static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
 205{
 206        BUG_ON(ib_conn == NULL);
 207
 208        iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n",
 209                 ib_conn, ib_conn->cma_id,
 210                 ib_conn->fmr_pool, ib_conn->qp);
 211
 212        /* qp is created only once both addr & route are resolved */
 213        if (ib_conn->fmr_pool != NULL)
 214                ib_destroy_fmr_pool(ib_conn->fmr_pool);
 215
 216        if (ib_conn->qp != NULL)
 217                rdma_destroy_qp(ib_conn->cma_id);
 218
 219        if (ib_conn->cma_id != NULL)
 220                rdma_destroy_id(ib_conn->cma_id);
 221
 222        ib_conn->fmr_pool = NULL;
 223        ib_conn->qp       = NULL;
 224        ib_conn->cma_id   = NULL;
 225        kfree(ib_conn->page_vec);
 226
 227        return 0;
 228}
 229
 230/**
 231 * based on the resolved device node GUID see if there already allocated
 232 * device for this device. If there's no such, create one.
 233 */
 234static
 235struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
 236{
 237        struct iser_device *device;
 238
 239        mutex_lock(&ig.device_list_mutex);
 240
 241        list_for_each_entry(device, &ig.device_list, ig_list)
 242                /* find if there's a match using the node GUID */
 243                if (device->ib_device->node_guid == cma_id->device->node_guid)
 244                        goto inc_refcnt;
 245
 246        device = kzalloc(sizeof *device, GFP_KERNEL);
 247        if (device == NULL)
 248                goto out;
 249
 250        /* assign this device to the device */
 251        device->ib_device = cma_id->device;
 252        /* init the device and link it into ig device list */
 253        if (iser_create_device_ib_res(device)) {
 254                kfree(device);
 255                device = NULL;
 256                goto out;
 257        }
 258        list_add(&device->ig_list, &ig.device_list);
 259
 260inc_refcnt:
 261        device->refcount++;
 262out:
 263        mutex_unlock(&ig.device_list_mutex);
 264        return device;
 265}
 266
 267/* if there's no demand for this device, release it */
 268static void iser_device_try_release(struct iser_device *device)
 269{
 270        mutex_lock(&ig.device_list_mutex);
 271        device->refcount--;
 272        iser_err("device %p refcount %d\n",device,device->refcount);
 273        if (!device->refcount) {
 274                iser_free_device_ib_res(device);
 275                list_del(&device->ig_list);
 276                kfree(device);
 277        }
 278        mutex_unlock(&ig.device_list_mutex);
 279}
 280
 281int iser_conn_state_comp(struct iser_conn *ib_conn,
 282                        enum iser_ib_conn_state comp)
 283{
 284        int ret;
 285
 286        spin_lock_bh(&ib_conn->lock);
 287        ret = (ib_conn->state == comp);
 288        spin_unlock_bh(&ib_conn->lock);
 289        return ret;
 290}
 291
 292static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
 293                                     enum iser_ib_conn_state comp,
 294                                     enum iser_ib_conn_state exch)
 295{
 296        int ret;
 297
 298        spin_lock_bh(&ib_conn->lock);
 299        if ((ret = (ib_conn->state == comp)))
 300                ib_conn->state = exch;
 301        spin_unlock_bh(&ib_conn->lock);
 302        return ret;
 303}
 304
 305/**
 306 * Frees all conn objects and deallocs conn descriptor
 307 */
 308static void iser_conn_release(struct iser_conn *ib_conn)
 309{
 310        struct iser_device  *device = ib_conn->device;
 311
 312        BUG_ON(ib_conn->state != ISER_CONN_DOWN);
 313
 314        mutex_lock(&ig.connlist_mutex);
 315        list_del(&ib_conn->conn_list);
 316        mutex_unlock(&ig.connlist_mutex);
 317
 318        iser_free_ib_conn_res(ib_conn);
 319        ib_conn->device = NULL;
 320        /* on EVENT_ADDR_ERROR there's no device yet for this conn */
 321        if (device != NULL)
 322                iser_device_try_release(device);
 323        if (ib_conn->iser_conn)
 324                ib_conn->iser_conn->ib_conn = NULL;
 325        iscsi_destroy_endpoint(ib_conn->ep);
 326}
 327
 328void iser_conn_get(struct iser_conn *ib_conn)
 329{
 330        atomic_inc(&ib_conn->refcount);
 331}
 332
 333void iser_conn_put(struct iser_conn *ib_conn)
 334{
 335        if (atomic_dec_and_test(&ib_conn->refcount))
 336                iser_conn_release(ib_conn);
 337}
 338
 339/**
 340 * triggers start of the disconnect procedures and wait for them to be done
 341 */
 342void iser_conn_terminate(struct iser_conn *ib_conn)
 343{
 344        int err = 0;
 345
 346        /* change the ib conn state only if the conn is UP, however always call
 347         * rdma_disconnect since this is the only way to cause the CMA to change
 348         * the QP state to ERROR
 349         */
 350
 351        iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING);
 352        err = rdma_disconnect(ib_conn->cma_id);
 353        if (err)
 354                iser_err("Failed to disconnect, conn: 0x%p err %d\n",
 355                         ib_conn,err);
 356
 357        wait_event_interruptible(ib_conn->wait,
 358                                 ib_conn->state == ISER_CONN_DOWN);
 359
 360        iser_conn_put(ib_conn);
 361}
 362
 363static void iser_connect_error(struct rdma_cm_id *cma_id)
 364{
 365        struct iser_conn *ib_conn;
 366        ib_conn = (struct iser_conn *)cma_id->context;
 367
 368        ib_conn->state = ISER_CONN_DOWN;
 369        wake_up_interruptible(&ib_conn->wait);
 370}
 371
 372static void iser_addr_handler(struct rdma_cm_id *cma_id)
 373{
 374        struct iser_device *device;
 375        struct iser_conn   *ib_conn;
 376        int    ret;
 377
 378        device = iser_device_find_by_ib_device(cma_id);
 379        if (!device) {
 380                iser_err("device lookup/creation failed\n");
 381                iser_connect_error(cma_id);
 382                return;
 383        }
 384
 385        ib_conn = (struct iser_conn *)cma_id->context;
 386        ib_conn->device = device;
 387
 388        ret = rdma_resolve_route(cma_id, 1000);
 389        if (ret) {
 390                iser_err("resolve route failed: %d\n", ret);
 391                iser_connect_error(cma_id);
 392        }
 393}
 394
 395static void iser_route_handler(struct rdma_cm_id *cma_id)
 396{
 397        struct rdma_conn_param conn_param;
 398        int    ret;
 399
 400        ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
 401        if (ret)
 402                goto failure;
 403
 404        memset(&conn_param, 0, sizeof conn_param);
 405        conn_param.responder_resources = 4;
 406        conn_param.initiator_depth     = 1;
 407        conn_param.retry_count         = 7;
 408        conn_param.rnr_retry_count     = 6;
 409
 410        ret = rdma_connect(cma_id, &conn_param);
 411        if (ret) {
 412                iser_err("failure connecting: %d\n", ret);
 413                goto failure;
 414        }
 415
 416        return;
 417failure:
 418        iser_connect_error(cma_id);
 419}
 420
 421static void iser_connected_handler(struct rdma_cm_id *cma_id)
 422{
 423        struct iser_conn *ib_conn;
 424
 425        ib_conn = (struct iser_conn *)cma_id->context;
 426        ib_conn->state = ISER_CONN_UP;
 427        wake_up_interruptible(&ib_conn->wait);
 428}
 429
 430static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
 431{
 432        struct iser_conn *ib_conn;
 433
 434        ib_conn = (struct iser_conn *)cma_id->context;
 435        ib_conn->disc_evt_flag = 1;
 436
 437        /* getting here when the state is UP means that the conn is being *
 438         * terminated asynchronously from the iSCSI layer's perspective.  */
 439        if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
 440                                      ISER_CONN_TERMINATING))
 441                iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
 442                                   ISCSI_ERR_CONN_FAILED);
 443
 444        /* Complete the termination process if no posts are pending */
 445        if ((atomic_read(&ib_conn->post_recv_buf_count) == 0) &&
 446            (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
 447                ib_conn->state = ISER_CONN_DOWN;
 448                wake_up_interruptible(&ib_conn->wait);
 449        }
 450}
 451
 452static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 453{
 454        int ret = 0;
 455
 456        iser_err("event %d conn %p id %p\n",event->event,cma_id->context,cma_id);
 457
 458        switch (event->event) {
 459        case RDMA_CM_EVENT_ADDR_RESOLVED:
 460                iser_addr_handler(cma_id);
 461                break;
 462        case RDMA_CM_EVENT_ROUTE_RESOLVED:
 463                iser_route_handler(cma_id);
 464                break;
 465        case RDMA_CM_EVENT_ESTABLISHED:
 466                iser_connected_handler(cma_id);
 467                break;
 468        case RDMA_CM_EVENT_ADDR_ERROR:
 469        case RDMA_CM_EVENT_ROUTE_ERROR:
 470        case RDMA_CM_EVENT_CONNECT_ERROR:
 471        case RDMA_CM_EVENT_UNREACHABLE:
 472        case RDMA_CM_EVENT_REJECTED:
 473                iser_err("event: %d, error: %d\n", event->event, event->status);
 474                iser_connect_error(cma_id);
 475                break;
 476        case RDMA_CM_EVENT_DISCONNECTED:
 477        case RDMA_CM_EVENT_DEVICE_REMOVAL:
 478        case RDMA_CM_EVENT_ADDR_CHANGE:
 479                iser_disconnected_handler(cma_id);
 480                break;
 481        default:
 482                iser_err("Unexpected RDMA CM event (%d)\n", event->event);
 483                break;
 484        }
 485        return ret;
 486}
 487
 488void iser_conn_init(struct iser_conn *ib_conn)
 489{
 490        ib_conn->state = ISER_CONN_INIT;
 491        init_waitqueue_head(&ib_conn->wait);
 492        atomic_set(&ib_conn->post_recv_buf_count, 0);
 493        atomic_set(&ib_conn->post_send_buf_count, 0);
 494        atomic_set(&ib_conn->unexpected_pdu_count, 0);
 495        atomic_set(&ib_conn->refcount, 1);
 496        INIT_LIST_HEAD(&ib_conn->conn_list);
 497        spin_lock_init(&ib_conn->lock);
 498}
 499
 500 /**
 501 * starts the process of connecting to the target
 502 * sleeps untill the connection is established or rejected
 503 */
 504int iser_connect(struct iser_conn   *ib_conn,
 505                 struct sockaddr_in *src_addr,
 506                 struct sockaddr_in *dst_addr,
 507                 int                 non_blocking)
 508{
 509        struct sockaddr *src, *dst;
 510        int err = 0;
 511
 512        sprintf(ib_conn->name, "%pI4:%d",
 513                &dst_addr->sin_addr.s_addr, dst_addr->sin_port);
 514
 515        /* the device is known only --after-- address resolution */
 516        ib_conn->device = NULL;
 517
 518        iser_err("connecting to: %pI4, port 0x%x\n",
 519                 &dst_addr->sin_addr, dst_addr->sin_port);
 520
 521        ib_conn->state = ISER_CONN_PENDING;
 522
 523        ib_conn->cma_id = rdma_create_id(iser_cma_handler,
 524                                             (void *)ib_conn,
 525                                             RDMA_PS_TCP);
 526        if (IS_ERR(ib_conn->cma_id)) {
 527                err = PTR_ERR(ib_conn->cma_id);
 528                iser_err("rdma_create_id failed: %d\n", err);
 529                goto id_failure;
 530        }
 531
 532        src = (struct sockaddr *)src_addr;
 533        dst = (struct sockaddr *)dst_addr;
 534        err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000);
 535        if (err) {
 536                iser_err("rdma_resolve_addr failed: %d\n", err);
 537                goto addr_failure;
 538        }
 539
 540        if (!non_blocking) {
 541                wait_event_interruptible(ib_conn->wait,
 542                                         (ib_conn->state != ISER_CONN_PENDING));
 543
 544                if (ib_conn->state != ISER_CONN_UP) {
 545                        err =  -EIO;
 546                        goto connect_failure;
 547                }
 548        }
 549
 550        mutex_lock(&ig.connlist_mutex);
 551        list_add(&ib_conn->conn_list, &ig.connlist);
 552        mutex_unlock(&ig.connlist_mutex);
 553        return 0;
 554
 555id_failure:
 556        ib_conn->cma_id = NULL;
 557addr_failure:
 558        ib_conn->state = ISER_CONN_DOWN;
 559connect_failure:
 560        iser_conn_release(ib_conn);
 561        return err;
 562}
 563
 564/**
 565 * iser_reg_page_vec - Register physical memory
 566 *
 567 * returns: 0 on success, errno code on failure
 568 */
 569int iser_reg_page_vec(struct iser_conn     *ib_conn,
 570                      struct iser_page_vec *page_vec,
 571                      struct iser_mem_reg  *mem_reg)
 572{
 573        struct ib_pool_fmr *mem;
 574        u64                io_addr;
 575        u64                *page_list;
 576        int                status;
 577
 578        page_list = page_vec->pages;
 579        io_addr   = page_list[0];
 580
 581        mem  = ib_fmr_pool_map_phys(ib_conn->fmr_pool,
 582                                    page_list,
 583                                    page_vec->length,
 584                                    io_addr);
 585
 586        if (IS_ERR(mem)) {
 587                status = (int)PTR_ERR(mem);
 588                iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
 589                return status;
 590        }
 591
 592        mem_reg->lkey  = mem->fmr->lkey;
 593        mem_reg->rkey  = mem->fmr->rkey;
 594        mem_reg->len   = page_vec->length * SIZE_4K;
 595        mem_reg->va    = io_addr;
 596        mem_reg->is_fmr = 1;
 597        mem_reg->mem_h = (void *)mem;
 598
 599        mem_reg->va   += page_vec->offset;
 600        mem_reg->len   = page_vec->data_size;
 601
 602        iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
 603                 "entry[0]: (0x%08lx,%ld)] -> "
 604                 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
 605                 page_vec, page_vec->length,
 606                 (unsigned long)page_vec->pages[0],
 607                 (unsigned long)page_vec->data_size,
 608                 (unsigned int)mem_reg->lkey, mem_reg->mem_h,
 609                 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
 610        return 0;
 611}
 612
 613/**
 614 * Unregister (previosuly registered) memory.
 615 */
 616void iser_unreg_mem(struct iser_mem_reg *reg)
 617{
 618        int ret;
 619
 620        iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
 621
 622        ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
 623        if (ret)
 624                iser_err("ib_fmr_pool_unmap failed %d\n", ret);
 625
 626        reg->mem_h = NULL;
 627}
 628
 629/**
 630 * iser_dto_to_iov - builds IOV from a dto descriptor
 631 */
 632static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_len)
 633{
 634        int                  i;
 635        struct ib_sge        *sge;
 636        struct iser_regd_buf *regd_buf;
 637
 638        if (dto->regd_vector_len > iov_len) {
 639                iser_err("iov size %d too small for posting dto of len %d\n",
 640                         iov_len, dto->regd_vector_len);
 641                BUG();
 642        }
 643
 644        for (i = 0; i < dto->regd_vector_len; i++) {
 645                sge         = &iov[i];
 646                regd_buf  = dto->regd[i];
 647
 648                sge->addr   = regd_buf->reg.va;
 649                sge->length = regd_buf->reg.len;
 650                sge->lkey   = regd_buf->reg.lkey;
 651
 652                if (dto->used_sz[i] > 0)  /* Adjust size */
 653                        sge->length = dto->used_sz[i];
 654
 655                /* offset and length should not exceed the regd buf length */
 656                if (sge->length + dto->offset[i] > regd_buf->reg.len) {
 657                        iser_err("Used len:%ld + offset:%d, exceed reg.buf.len:"
 658                                 "%ld in dto:0x%p [%d], va:0x%08lX\n",
 659                                 (unsigned long)sge->length, dto->offset[i],
 660                                 (unsigned long)regd_buf->reg.len, dto, i,
 661                                 (unsigned long)sge->addr);
 662                        BUG();
 663                }
 664
 665                sge->addr += dto->offset[i]; /* Adjust offset */
 666        }
 667}
 668
 669/**
 670 * iser_post_recv - Posts a receive buffer.
 671 *
 672 * returns 0 on success, -1 on failure
 673 */
 674int iser_post_recv(struct iser_desc *rx_desc)
 675{
 676        int               ib_ret, ret_val = 0;
 677        struct ib_recv_wr recv_wr, *recv_wr_failed;
 678        struct ib_sge     iov[2];
 679        struct iser_conn  *ib_conn;
 680        struct iser_dto   *recv_dto = &rx_desc->dto;
 681
 682        /* Retrieve conn */
 683        ib_conn = recv_dto->ib_conn;
 684
 685        iser_dto_to_iov(recv_dto, iov, 2);
 686
 687        recv_wr.next    = NULL;
 688        recv_wr.sg_list = iov;
 689        recv_wr.num_sge = recv_dto->regd_vector_len;
 690        recv_wr.wr_id   = (unsigned long)rx_desc;
 691
 692        atomic_inc(&ib_conn->post_recv_buf_count);
 693        ib_ret  = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed);
 694        if (ib_ret) {
 695                iser_err("ib_post_recv failed ret=%d\n", ib_ret);
 696                atomic_dec(&ib_conn->post_recv_buf_count);
 697                ret_val = -1;
 698        }
 699
 700        return ret_val;
 701}
 702
 703/**
 704 * iser_start_send - Initiate a Send DTO operation
 705 *
 706 * returns 0 on success, -1 on failure
 707 */
 708int iser_post_send(struct iser_desc *tx_desc)
 709{
 710        int               ib_ret, ret_val = 0;
 711        struct ib_send_wr send_wr, *send_wr_failed;
 712        struct ib_sge     iov[MAX_REGD_BUF_VECTOR_LEN];
 713        struct iser_conn  *ib_conn;
 714        struct iser_dto   *dto = &tx_desc->dto;
 715
 716        ib_conn = dto->ib_conn;
 717
 718        iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN);
 719
 720        send_wr.next       = NULL;
 721        send_wr.wr_id      = (unsigned long)tx_desc;
 722        send_wr.sg_list    = iov;
 723        send_wr.num_sge    = dto->regd_vector_len;
 724        send_wr.opcode     = IB_WR_SEND;
 725        send_wr.send_flags = dto->notify_enable ? IB_SEND_SIGNALED : 0;
 726
 727        atomic_inc(&ib_conn->post_send_buf_count);
 728
 729        ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
 730        if (ib_ret) {
 731                iser_err("Failed to start SEND DTO, dto: 0x%p, IOV len: %d\n",
 732                         dto, dto->regd_vector_len);
 733                iser_err("ib_post_send failed, ret:%d\n", ib_ret);
 734                atomic_dec(&ib_conn->post_send_buf_count);
 735                ret_val = -1;
 736        }
 737
 738        return ret_val;
 739}
 740
 741static void iser_handle_comp_error(struct iser_desc *desc)
 742{
 743        struct iser_dto  *dto     = &desc->dto;
 744        struct iser_conn *ib_conn = dto->ib_conn;
 745
 746        iser_dto_buffs_release(dto);
 747
 748        if (desc->type == ISCSI_RX) {
 749                kfree(desc->data);
 750                kmem_cache_free(ig.desc_cache, desc);
 751                atomic_dec(&ib_conn->post_recv_buf_count);
 752        } else { /* type is TX control/command/dataout */
 753                if (desc->type == ISCSI_TX_DATAOUT)
 754                        kmem_cache_free(ig.desc_cache, desc);
 755                atomic_dec(&ib_conn->post_send_buf_count);
 756        }
 757
 758        if (atomic_read(&ib_conn->post_recv_buf_count) == 0 &&
 759            atomic_read(&ib_conn->post_send_buf_count) == 0) {
 760                /* getting here when the state is UP means that the conn is *
 761                 * being terminated asynchronously from the iSCSI layer's   *
 762                 * perspective.                                             */
 763                if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
 764                    ISER_CONN_TERMINATING))
 765                        iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
 766                                           ISCSI_ERR_CONN_FAILED);
 767
 768                /* complete the termination process if disconnect event was delivered *
 769                 * note there are no more non completed posts to the QP               */
 770                if (ib_conn->disc_evt_flag) {
 771                        ib_conn->state = ISER_CONN_DOWN;
 772                        wake_up_interruptible(&ib_conn->wait);
 773                }
 774        }
 775}
 776
 777static void iser_cq_tasklet_fn(unsigned long data)
 778{
 779         struct iser_device  *device = (struct iser_device *)data;
 780         struct ib_cq        *cq = device->cq;
 781         struct ib_wc        wc;
 782         struct iser_desc    *desc;
 783         unsigned long       xfer_len;
 784
 785        while (ib_poll_cq(cq, 1, &wc) == 1) {
 786                desc     = (struct iser_desc *) (unsigned long) wc.wr_id;
 787                BUG_ON(desc == NULL);
 788
 789                if (wc.status == IB_WC_SUCCESS) {
 790                        if (desc->type == ISCSI_RX) {
 791                                xfer_len = (unsigned long)wc.byte_len;
 792                                iser_rcv_completion(desc, xfer_len);
 793                        } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */
 794                                iser_snd_completion(desc);
 795                } else {
 796                        iser_err("comp w. error op %d status %d\n",desc->type,wc.status);
 797                        iser_handle_comp_error(desc);
 798                }
 799        }
 800        /* #warning "it is assumed here that arming CQ only once its empty" *
 801         * " would not cause interrupts to be missed"                       */
 802        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 803}
 804
 805static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
 806{
 807        struct iser_device  *device = (struct iser_device *)cq_context;
 808
 809        tasklet_schedule(&device->cq_tasklet);
 810}
 811