linux/net/9p/trans_rdma.c
<<
>>
Prefs
   1/*
   2 * linux/fs/9p/trans_rdma.c
   3 *
   4 * RDMA transport layer based on the trans_fd.c implementation.
   5 *
   6 *  Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com>
   7 *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
   8 *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
   9 *  Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
  10 *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
  11 *
  12 *  This program is free software; you can redistribute it and/or modify
  13 *  it under the terms of the GNU General Public License version 2
  14 *  as published by the Free Software Foundation.
  15 *
  16 *  This program is distributed in the hope that it will be useful,
  17 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 *  GNU General Public License for more details.
  20 *
  21 *  You should have received a copy of the GNU General Public License
  22 *  along with this program; if not, write to:
  23 *  Free Software Foundation
  24 *  51 Franklin Street, Fifth Floor
  25 *  Boston, MA  02111-1301  USA
  26 *
  27 */
  28
  29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  30
  31#include <linux/in.h>
  32#include <linux/module.h>
  33#include <linux/net.h>
  34#include <linux/ipv6.h>
  35#include <linux/kthread.h>
  36#include <linux/errno.h>
  37#include <linux/kernel.h>
  38#include <linux/un.h>
  39#include <linux/uaccess.h>
  40#include <linux/inet.h>
  41#include <linux/idr.h>
  42#include <linux/file.h>
  43#include <linux/parser.h>
  44#include <linux/semaphore.h>
  45#include <linux/slab.h>
  46#include <linux/seq_file.h>
  47#include <net/9p/9p.h>
  48#include <net/9p/client.h>
  49#include <net/9p/transport.h>
  50#include <rdma/ib_verbs.h>
  51#include <rdma/rdma_cm.h>
  52
  53#define P9_PORT                 5640
  54#define P9_RDMA_SQ_DEPTH        32
  55#define P9_RDMA_RQ_DEPTH        32
  56#define P9_RDMA_SEND_SGE        4
  57#define P9_RDMA_RECV_SGE        4
  58#define P9_RDMA_IRD             0
  59#define P9_RDMA_ORD             0
  60#define P9_RDMA_TIMEOUT         30000           /* 30 seconds */
  61#define P9_RDMA_MAXSIZE         (1024*1024)     /* 1MB */
  62
  63/**
  64 * struct p9_trans_rdma - RDMA transport instance
  65 *
  66 * @state: tracks the transport state machine for connection setup and tear down
  67 * @cm_id: The RDMA CM ID
  68 * @pd: Protection Domain pointer
  69 * @qp: Queue Pair pointer
  70 * @cq: Completion Queue pointer
  71 * @dm_mr: DMA Memory Region pointer
  72 * @lkey: The local access only memory region key
  73 * @timeout: Number of uSecs to wait for connection management events
  74 * @privport: Whether a privileged port may be used
  75 * @port: The port to use
  76 * @sq_depth: The depth of the Send Queue
  77 * @sq_sem: Semaphore for the SQ
  78 * @rq_depth: The depth of the Receive Queue.
  79 * @rq_sem: Semaphore for the RQ
  80 * @excess_rc : Amount of posted Receive Contexts without a pending request.
  81 *              See rdma_request()
  82 * @addr: The remote peer's address
  83 * @req_lock: Protects the active request list
  84 * @cm_done: Completion event for connection management tracking
  85 */
  86struct p9_trans_rdma {
  87        enum {
  88                P9_RDMA_INIT,
  89                P9_RDMA_ADDR_RESOLVED,
  90                P9_RDMA_ROUTE_RESOLVED,
  91                P9_RDMA_CONNECTED,
  92                P9_RDMA_FLUSHING,
  93                P9_RDMA_CLOSING,
  94                P9_RDMA_CLOSED,
  95        } state;
  96        struct rdma_cm_id *cm_id;
  97        struct ib_pd *pd;
  98        struct ib_qp *qp;
  99        struct ib_cq *cq;
 100        long timeout;
 101        bool privport;
 102        u16 port;
 103        int sq_depth;
 104        struct semaphore sq_sem;
 105        int rq_depth;
 106        struct semaphore rq_sem;
 107        atomic_t excess_rc;
 108        struct sockaddr_in addr;
 109        spinlock_t req_lock;
 110
 111        struct completion cm_done;
 112};
 113
 114/**
 115 * p9_rdma_context - Keeps track of in-process WR
 116 *
 117 * @busa: Bus address to unmap when the WR completes
 118 * @req: Keeps track of requests (send)
 119 * @rc: Keepts track of replies (receive)
 120 */
 121struct p9_rdma_req;
 122struct p9_rdma_context {
 123        struct ib_cqe cqe;
 124        dma_addr_t busa;
 125        union {
 126                struct p9_req_t *req;
 127                struct p9_fcall *rc;
 128        };
 129};
 130
 131/**
 132 * p9_rdma_opts - Collection of mount options
 133 * @port: port of connection
 134 * @sq_depth: The requested depth of the SQ. This really doesn't need
 135 * to be any deeper than the number of threads used in the client
 136 * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth
 137 * @timeout: Time to wait in msecs for CM events
 138 */
 139struct p9_rdma_opts {
 140        short port;
 141        bool privport;
 142        int sq_depth;
 143        int rq_depth;
 144        long timeout;
 145};
 146
 147/*
 148 * Option Parsing (code inspired by NFS code)
 149 */
 150enum {
 151        /* Options that take integer arguments */
 152        Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout,
 153        /* Options that take no argument */
 154        Opt_privport,
 155        Opt_err,
 156};
 157
 158static match_table_t tokens = {
 159        {Opt_port, "port=%u"},
 160        {Opt_sq_depth, "sq=%u"},
 161        {Opt_rq_depth, "rq=%u"},
 162        {Opt_timeout, "timeout=%u"},
 163        {Opt_privport, "privport"},
 164        {Opt_err, NULL},
 165};
 166
 167static int p9_rdma_show_options(struct seq_file *m, struct p9_client *clnt)
 168{
 169        struct p9_trans_rdma *rdma = clnt->trans;
 170
 171        if (rdma->port != P9_PORT)
 172                seq_printf(m, ",port=%u", rdma->port);
 173        if (rdma->sq_depth != P9_RDMA_SQ_DEPTH)
 174                seq_printf(m, ",sq=%u", rdma->sq_depth);
 175        if (rdma->rq_depth != P9_RDMA_RQ_DEPTH)
 176                seq_printf(m, ",rq=%u", rdma->rq_depth);
 177        if (rdma->timeout != P9_RDMA_TIMEOUT)
 178                seq_printf(m, ",timeout=%lu", rdma->timeout);
 179        if (rdma->privport)
 180                seq_puts(m, ",privport");
 181        return 0;
 182}
 183
 184/**
 185 * parse_opts - parse mount options into rdma options structure
 186 * @params: options string passed from mount
 187 * @opts: rdma transport-specific structure to parse options into
 188 *
 189 * Returns 0 upon success, -ERRNO upon failure
 190 */
 191static int parse_opts(char *params, struct p9_rdma_opts *opts)
 192{
 193        char *p;
 194        substring_t args[MAX_OPT_ARGS];
 195        int option;
 196        char *options, *tmp_options;
 197
 198        opts->port = P9_PORT;
 199        opts->sq_depth = P9_RDMA_SQ_DEPTH;
 200        opts->rq_depth = P9_RDMA_RQ_DEPTH;
 201        opts->timeout = P9_RDMA_TIMEOUT;
 202        opts->privport = false;
 203
 204        if (!params)
 205                return 0;
 206
 207        tmp_options = kstrdup(params, GFP_KERNEL);
 208        if (!tmp_options) {
 209                p9_debug(P9_DEBUG_ERROR,
 210                         "failed to allocate copy of option string\n");
 211                return -ENOMEM;
 212        }
 213        options = tmp_options;
 214
 215        while ((p = strsep(&options, ",")) != NULL) {
 216                int token;
 217                int r;
 218                if (!*p)
 219                        continue;
 220                token = match_token(p, tokens, args);
 221                if ((token != Opt_err) && (token != Opt_privport)) {
 222                        r = match_int(&args[0], &option);
 223                        if (r < 0) {
 224                                p9_debug(P9_DEBUG_ERROR,
 225                                         "integer field, but no integer?\n");
 226                                continue;
 227                        }
 228                }
 229                switch (token) {
 230                case Opt_port:
 231                        opts->port = option;
 232                        break;
 233                case Opt_sq_depth:
 234                        opts->sq_depth = option;
 235                        break;
 236                case Opt_rq_depth:
 237                        opts->rq_depth = option;
 238                        break;
 239                case Opt_timeout:
 240                        opts->timeout = option;
 241                        break;
 242                case Opt_privport:
 243                        opts->privport = true;
 244                        break;
 245                default:
 246                        continue;
 247                }
 248        }
 249        /* RQ must be at least as large as the SQ */
 250        opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
 251        kfree(tmp_options);
 252        return 0;
 253}
 254
 255static int
 256p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
 257{
 258        struct p9_client *c = id->context;
 259        struct p9_trans_rdma *rdma = c->trans;
 260        switch (event->event) {
 261        case RDMA_CM_EVENT_ADDR_RESOLVED:
 262                BUG_ON(rdma->state != P9_RDMA_INIT);
 263                rdma->state = P9_RDMA_ADDR_RESOLVED;
 264                break;
 265
 266        case RDMA_CM_EVENT_ROUTE_RESOLVED:
 267                BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED);
 268                rdma->state = P9_RDMA_ROUTE_RESOLVED;
 269                break;
 270
 271        case RDMA_CM_EVENT_ESTABLISHED:
 272                BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED);
 273                rdma->state = P9_RDMA_CONNECTED;
 274                break;
 275
 276        case RDMA_CM_EVENT_DISCONNECTED:
 277                if (rdma)
 278                        rdma->state = P9_RDMA_CLOSED;
 279                if (c)
 280                        c->status = Disconnected;
 281                break;
 282
 283        case RDMA_CM_EVENT_TIMEWAIT_EXIT:
 284                break;
 285
 286        case RDMA_CM_EVENT_ADDR_CHANGE:
 287        case RDMA_CM_EVENT_ROUTE_ERROR:
 288        case RDMA_CM_EVENT_DEVICE_REMOVAL:
 289        case RDMA_CM_EVENT_MULTICAST_JOIN:
 290        case RDMA_CM_EVENT_MULTICAST_ERROR:
 291        case RDMA_CM_EVENT_REJECTED:
 292        case RDMA_CM_EVENT_CONNECT_REQUEST:
 293        case RDMA_CM_EVENT_CONNECT_RESPONSE:
 294        case RDMA_CM_EVENT_CONNECT_ERROR:
 295        case RDMA_CM_EVENT_ADDR_ERROR:
 296        case RDMA_CM_EVENT_UNREACHABLE:
 297                c->status = Disconnected;
 298                rdma_disconnect(rdma->cm_id);
 299                break;
 300        default:
 301                BUG();
 302        }
 303        complete(&rdma->cm_done);
 304        return 0;
 305}
 306
 307static void
 308recv_done(struct ib_cq *cq, struct ib_wc *wc)
 309{
 310        struct p9_client *client = cq->cq_context;
 311        struct p9_trans_rdma *rdma = client->trans;
 312        struct p9_rdma_context *c =
 313                container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
 314        struct p9_req_t *req;
 315        int err = 0;
 316        int16_t tag;
 317
 318        req = NULL;
 319        ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
 320                                                         DMA_FROM_DEVICE);
 321
 322        if (wc->status != IB_WC_SUCCESS)
 323                goto err_out;
 324
 325        err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
 326        if (err)
 327                goto err_out;
 328
 329        req = p9_tag_lookup(client, tag);
 330        if (!req)
 331                goto err_out;
 332
 333        /* Check that we have not yet received a reply for this request.
 334         */
 335        if (unlikely(req->rc)) {
 336                pr_err("Duplicate reply for request %d", tag);
 337                goto err_out;
 338        }
 339
 340        req->rc = c->rc;
 341        p9_client_cb(client, req, REQ_STATUS_RCVD);
 342
 343 out:
 344        up(&rdma->rq_sem);
 345        kfree(c);
 346        return;
 347
 348 err_out:
 349        p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n",
 350                        req, err, wc->status);
 351        rdma->state = P9_RDMA_FLUSHING;
 352        client->status = Disconnected;
 353        goto out;
 354}
 355
 356static void
 357send_done(struct ib_cq *cq, struct ib_wc *wc)
 358{
 359        struct p9_client *client = cq->cq_context;
 360        struct p9_trans_rdma *rdma = client->trans;
 361        struct p9_rdma_context *c =
 362                container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
 363
 364        ib_dma_unmap_single(rdma->cm_id->device,
 365                            c->busa, c->req->tc->size,
 366                            DMA_TO_DEVICE);
 367        up(&rdma->sq_sem);
 368        kfree(c);
 369}
 370
 371static void qp_event_handler(struct ib_event *event, void *context)
 372{
 373        p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n",
 374                 event->event, context);
 375}
 376
 377static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
 378{
 379        if (!rdma)
 380                return;
 381
 382        if (rdma->qp && !IS_ERR(rdma->qp))
 383                ib_destroy_qp(rdma->qp);
 384
 385        if (rdma->pd && !IS_ERR(rdma->pd))
 386                ib_dealloc_pd(rdma->pd);
 387
 388        if (rdma->cq && !IS_ERR(rdma->cq))
 389                ib_free_cq(rdma->cq);
 390
 391        if (rdma->cm_id && !IS_ERR(rdma->cm_id))
 392                rdma_destroy_id(rdma->cm_id);
 393
 394        kfree(rdma);
 395}
 396
 397static int
 398post_recv(struct p9_client *client, struct p9_rdma_context *c)
 399{
 400        struct p9_trans_rdma *rdma = client->trans;
 401        struct ib_recv_wr wr, *bad_wr;
 402        struct ib_sge sge;
 403
 404        c->busa = ib_dma_map_single(rdma->cm_id->device,
 405                                    c->rc->sdata, client->msize,
 406                                    DMA_FROM_DEVICE);
 407        if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
 408                goto error;
 409
 410        c->cqe.done = recv_done;
 411
 412        sge.addr = c->busa;
 413        sge.length = client->msize;
 414        sge.lkey = rdma->pd->local_dma_lkey;
 415
 416        wr.next = NULL;
 417        wr.wr_cqe = &c->cqe;
 418        wr.sg_list = &sge;
 419        wr.num_sge = 1;
 420        return ib_post_recv(rdma->qp, &wr, &bad_wr);
 421
 422 error:
 423        p9_debug(P9_DEBUG_ERROR, "EIO\n");
 424        return -EIO;
 425}
 426
 427static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 428{
 429        struct p9_trans_rdma *rdma = client->trans;
 430        struct ib_send_wr wr, *bad_wr;
 431        struct ib_sge sge;
 432        int err = 0;
 433        unsigned long flags;
 434        struct p9_rdma_context *c = NULL;
 435        struct p9_rdma_context *rpl_context = NULL;
 436
 437        /* When an error occurs between posting the recv and the send,
 438         * there will be a receive context posted without a pending request.
 439         * Since there is no way to "un-post" it, we remember it and skip
 440         * post_recv() for the next request.
 441         * So here,
 442         * see if we are this `next request' and need to absorb an excess rc.
 443         * If yes, then drop and free our own, and do not recv_post().
 444         **/
 445        if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
 446                if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
 447                        /* Got one ! */
 448                        kfree(req->rc);
 449                        req->rc = NULL;
 450                        goto dont_need_post_recv;
 451                } else {
 452                        /* We raced and lost. */
 453                        atomic_inc(&rdma->excess_rc);
 454                }
 455        }
 456
 457        /* Allocate an fcall for the reply */
 458        rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
 459        if (!rpl_context) {
 460                err = -ENOMEM;
 461                goto recv_error;
 462        }
 463        rpl_context->rc = req->rc;
 464
 465        /*
 466         * Post a receive buffer for this request. We need to ensure
 467         * there is a reply buffer available for every outstanding
 468         * request. A flushed request can result in no reply for an
 469         * outstanding request, so we must keep a count to avoid
 470         * overflowing the RQ.
 471         */
 472        if (down_interruptible(&rdma->rq_sem)) {
 473                err = -EINTR;
 474                goto recv_error;
 475        }
 476
 477        err = post_recv(client, rpl_context);
 478        if (err) {
 479                p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
 480                goto recv_error;
 481        }
 482        /* remove posted receive buffer from request structure */
 483        req->rc = NULL;
 484
 485dont_need_post_recv:
 486        /* Post the request */
 487        c = kmalloc(sizeof *c, GFP_NOFS);
 488        if (!c) {
 489                err = -ENOMEM;
 490                goto send_error;
 491        }
 492        c->req = req;
 493
 494        c->busa = ib_dma_map_single(rdma->cm_id->device,
 495                                    c->req->tc->sdata, c->req->tc->size,
 496                                    DMA_TO_DEVICE);
 497        if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
 498                err = -EIO;
 499                goto send_error;
 500        }
 501
 502        c->cqe.done = send_done;
 503
 504        sge.addr = c->busa;
 505        sge.length = c->req->tc->size;
 506        sge.lkey = rdma->pd->local_dma_lkey;
 507
 508        wr.next = NULL;
 509        wr.wr_cqe = &c->cqe;
 510        wr.opcode = IB_WR_SEND;
 511        wr.send_flags = IB_SEND_SIGNALED;
 512        wr.sg_list = &sge;
 513        wr.num_sge = 1;
 514
 515        if (down_interruptible(&rdma->sq_sem)) {
 516                err = -EINTR;
 517                goto send_error;
 518        }
 519
 520        /* Mark request as `sent' *before* we actually send it,
 521         * because doing if after could erase the REQ_STATUS_RCVD
 522         * status in case of a very fast reply.
 523         */
 524        req->status = REQ_STATUS_SENT;
 525        err = ib_post_send(rdma->qp, &wr, &bad_wr);
 526        if (err)
 527                goto send_error;
 528
 529        /* Success */
 530        return 0;
 531
 532 /* Handle errors that happened during or while preparing the send: */
 533 send_error:
 534        req->status = REQ_STATUS_ERROR;
 535        kfree(c);
 536        p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
 537
 538        /* Ach.
 539         *  We did recv_post(), but not send. We have one recv_post in excess.
 540         */
 541        atomic_inc(&rdma->excess_rc);
 542        return err;
 543
 544 /* Handle errors that happened during or while preparing post_recv(): */
 545 recv_error:
 546        kfree(rpl_context);
 547        spin_lock_irqsave(&rdma->req_lock, flags);
 548        if (rdma->state < P9_RDMA_CLOSING) {
 549                rdma->state = P9_RDMA_CLOSING;
 550                spin_unlock_irqrestore(&rdma->req_lock, flags);
 551                rdma_disconnect(rdma->cm_id);
 552        } else
 553                spin_unlock_irqrestore(&rdma->req_lock, flags);
 554        return err;
 555}
 556
 557static void rdma_close(struct p9_client *client)
 558{
 559        struct p9_trans_rdma *rdma;
 560
 561        if (!client)
 562                return;
 563
 564        rdma = client->trans;
 565        if (!rdma)
 566                return;
 567
 568        client->status = Disconnected;
 569        rdma_disconnect(rdma->cm_id);
 570        rdma_destroy_trans(rdma);
 571}
 572
 573/**
 574 * alloc_rdma - Allocate and initialize the rdma transport structure
 575 * @opts: Mount options structure
 576 */
 577static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
 578{
 579        struct p9_trans_rdma *rdma;
 580
 581        rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL);
 582        if (!rdma)
 583                return NULL;
 584
 585        rdma->port = opts->port;
 586        rdma->privport = opts->privport;
 587        rdma->sq_depth = opts->sq_depth;
 588        rdma->rq_depth = opts->rq_depth;
 589        rdma->timeout = opts->timeout;
 590        spin_lock_init(&rdma->req_lock);
 591        init_completion(&rdma->cm_done);
 592        sema_init(&rdma->sq_sem, rdma->sq_depth);
 593        sema_init(&rdma->rq_sem, rdma->rq_depth);
 594        atomic_set(&rdma->excess_rc, 0);
 595
 596        return rdma;
 597}
 598
 599static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
 600{
 601        /* Nothing to do here.
 602         * We will take care of it (if we have to) in rdma_cancelled()
 603         */
 604        return 1;
 605}
 606
 607/* A request has been fully flushed without a reply.
 608 * That means we have posted one buffer in excess.
 609 */
 610static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
 611{
 612        struct p9_trans_rdma *rdma = client->trans;
 613        atomic_inc(&rdma->excess_rc);
 614        return 0;
 615}
 616
 617static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
 618{
 619        struct sockaddr_in cl = {
 620                .sin_family = AF_INET,
 621                .sin_addr.s_addr = htonl(INADDR_ANY),
 622        };
 623        int port, err = -EINVAL;
 624
 625        for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) {
 626                cl.sin_port = htons((ushort)port);
 627                err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl);
 628                if (err != -EADDRINUSE)
 629                        break;
 630        }
 631        return err;
 632}
 633
 634/**
 635 * trans_create_rdma - Transport method for creating atransport instance
 636 * @client: client instance
 637 * @addr: IP address string
 638 * @args: Mount options string
 639 */
 640static int
 641rdma_create_trans(struct p9_client *client, const char *addr, char *args)
 642{
 643        int err;
 644        struct p9_rdma_opts opts;
 645        struct p9_trans_rdma *rdma;
 646        struct rdma_conn_param conn_param;
 647        struct ib_qp_init_attr qp_attr;
 648
 649        /* Parse the transport specific mount options */
 650        err = parse_opts(args, &opts);
 651        if (err < 0)
 652                return err;
 653
 654        /* Create and initialize the RDMA transport structure */
 655        rdma = alloc_rdma(&opts);
 656        if (!rdma)
 657                return -ENOMEM;
 658
 659        /* Create the RDMA CM ID */
 660        rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
 661                                     RDMA_PS_TCP, IB_QPT_RC);
 662        if (IS_ERR(rdma->cm_id))
 663                goto error;
 664
 665        /* Associate the client with the transport */
 666        client->trans = rdma;
 667
 668        /* Bind to a privileged port if we need to */
 669        if (opts.privport) {
 670                err = p9_rdma_bind_privport(rdma);
 671                if (err < 0) {
 672                        pr_err("%s (%d): problem binding to privport: %d\n",
 673                               __func__, task_pid_nr(current), -err);
 674                        goto error;
 675                }
 676        }
 677
 678        /* Resolve the server's address */
 679        rdma->addr.sin_family = AF_INET;
 680        rdma->addr.sin_addr.s_addr = in_aton(addr);
 681        rdma->addr.sin_port = htons(opts.port);
 682        err = rdma_resolve_addr(rdma->cm_id, NULL,
 683                                (struct sockaddr *)&rdma->addr,
 684                                rdma->timeout);
 685        if (err)
 686                goto error;
 687        err = wait_for_completion_interruptible(&rdma->cm_done);
 688        if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
 689                goto error;
 690
 691        /* Resolve the route to the server */
 692        err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
 693        if (err)
 694                goto error;
 695        err = wait_for_completion_interruptible(&rdma->cm_done);
 696        if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
 697                goto error;
 698
 699        /* Create the Completion Queue */
 700        rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
 701                        opts.sq_depth + opts.rq_depth + 1,
 702                        0, IB_POLL_SOFTIRQ);
 703        if (IS_ERR(rdma->cq))
 704                goto error;
 705
 706        /* Create the Protection Domain */
 707        rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0);
 708        if (IS_ERR(rdma->pd))
 709                goto error;
 710
 711        /* Create the Queue Pair */
 712        memset(&qp_attr, 0, sizeof qp_attr);
 713        qp_attr.event_handler = qp_event_handler;
 714        qp_attr.qp_context = client;
 715        qp_attr.cap.max_send_wr = opts.sq_depth;
 716        qp_attr.cap.max_recv_wr = opts.rq_depth;
 717        qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
 718        qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
 719        qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
 720        qp_attr.qp_type = IB_QPT_RC;
 721        qp_attr.send_cq = rdma->cq;
 722        qp_attr.recv_cq = rdma->cq;
 723        err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
 724        if (err)
 725                goto error;
 726        rdma->qp = rdma->cm_id->qp;
 727
 728        /* Request a connection */
 729        memset(&conn_param, 0, sizeof(conn_param));
 730        conn_param.private_data = NULL;
 731        conn_param.private_data_len = 0;
 732        conn_param.responder_resources = P9_RDMA_IRD;
 733        conn_param.initiator_depth = P9_RDMA_ORD;
 734        err = rdma_connect(rdma->cm_id, &conn_param);
 735        if (err)
 736                goto error;
 737        err = wait_for_completion_interruptible(&rdma->cm_done);
 738        if (err || (rdma->state != P9_RDMA_CONNECTED))
 739                goto error;
 740
 741        client->status = Connected;
 742
 743        return 0;
 744
 745error:
 746        rdma_destroy_trans(rdma);
 747        return -ENOTCONN;
 748}
 749
 750static struct p9_trans_module p9_rdma_trans = {
 751        .name = "rdma",
 752        .maxsize = P9_RDMA_MAXSIZE,
 753        .def = 0,
 754        .owner = THIS_MODULE,
 755        .create = rdma_create_trans,
 756        .close = rdma_close,
 757        .request = rdma_request,
 758        .cancel = rdma_cancel,
 759        .cancelled = rdma_cancelled,
 760        .show_options = p9_rdma_show_options,
 761};
 762
 763/**
 764 * p9_trans_rdma_init - Register the 9P RDMA transport driver
 765 */
 766static int __init p9_trans_rdma_init(void)
 767{
 768        v9fs_register_trans(&p9_rdma_trans);
 769        return 0;
 770}
 771
 772static void __exit p9_trans_rdma_exit(void)
 773{
 774        v9fs_unregister_trans(&p9_rdma_trans);
 775}
 776
 777module_init(p9_trans_rdma_init);
 778module_exit(p9_trans_rdma_exit);
 779
 780MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
 781MODULE_DESCRIPTION("RDMA Transport for 9P");
 782MODULE_LICENSE("Dual BSD/GPL");
 783