linux/drivers/infiniband/core/cma.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
   3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
   4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
   5 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/completion.h>
  37#include <linux/in.h>
  38#include <linux/in6.h>
  39#include <linux/mutex.h>
  40#include <linux/random.h>
  41#include <linux/idr.h>
  42#include <linux/inetdevice.h>
  43#include <linux/slab.h>
  44#include <linux/module.h>
  45#include <net/route.h>
  46
  47#include <net/tcp.h>
  48#include <net/ipv6.h>
  49
  50#include <rdma/rdma_cm.h>
  51#include <rdma/rdma_cm_ib.h>
  52#include <rdma/rdma_netlink.h>
  53#include <rdma/ib.h>
  54#include <rdma/ib_cache.h>
  55#include <rdma/ib_cm.h>
  56#include <rdma/ib_sa.h>
  57#include <rdma/iw_cm.h>
  58
  59MODULE_AUTHOR("Sean Hefty");
  60MODULE_DESCRIPTION("Generic RDMA CM Agent");
  61MODULE_LICENSE("Dual BSD/GPL");
  62
  63#define CMA_CM_RESPONSE_TIMEOUT 20
  64#define CMA_MAX_CM_RETRIES 15
  65#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
  66#define CMA_IBOE_PACKET_LIFETIME 18
  67
  68static void cma_add_one(struct ib_device *device);
  69static void cma_remove_one(struct ib_device *device);
  70
  71static struct ib_client cma_client = {
  72        .name   = "cma",
  73        .add    = cma_add_one,
  74        .remove = cma_remove_one
  75};
  76
  77static struct ib_sa_client sa_client;
  78static struct rdma_addr_client addr_client;
  79static LIST_HEAD(dev_list);
  80static LIST_HEAD(listen_any_list);
  81static DEFINE_MUTEX(lock);
  82static struct workqueue_struct *cma_wq;
  83static DEFINE_IDR(tcp_ps);
  84static DEFINE_IDR(udp_ps);
  85static DEFINE_IDR(ipoib_ps);
  86static DEFINE_IDR(ib_ps);
  87
  88struct cma_device {
  89        struct list_head        list;
  90        struct ib_device        *device;
  91        struct completion       comp;
  92        atomic_t                refcount;
  93        struct list_head        id_list;
  94};
  95
  96struct rdma_bind_list {
  97        struct idr              *ps;
  98        struct hlist_head       owners;
  99        unsigned short          port;
 100};
 101
 102enum {
 103        CMA_OPTION_AFONLY,
 104};
 105
 106/*
 107 * Device removal can occur at anytime, so we need extra handling to
 108 * serialize notifying the user of device removal with other callbacks.
 109 * We do this by disabling removal notification while a callback is in process,
 110 * and reporting it after the callback completes.
 111 */
 112struct rdma_id_private {
 113        struct rdma_cm_id       id;
 114
 115        struct rdma_bind_list   *bind_list;
 116        struct hlist_node       node;
 117        struct list_head        list; /* listen_any_list or cma_device.list */
 118        struct list_head        listen_list; /* per device listens */
 119        struct cma_device       *cma_dev;
 120        struct list_head        mc_list;
 121
 122        int                     internal_id;
 123        enum rdma_cm_state      state;
 124        spinlock_t              lock;
 125        struct mutex            qp_mutex;
 126
 127        struct completion       comp;
 128        atomic_t                refcount;
 129        struct mutex            handler_mutex;
 130
 131        int                     backlog;
 132        int                     timeout_ms;
 133        struct ib_sa_query      *query;
 134        int                     query_id;
 135        union {
 136                struct ib_cm_id *ib;
 137                struct iw_cm_id *iw;
 138        } cm_id;
 139
 140        u32                     seq_num;
 141        u32                     qkey;
 142        u32                     qp_num;
 143        pid_t                   owner;
 144        u32                     options;
 145        u8                      srq;
 146        u8                      tos;
 147        u8                      reuseaddr;
 148        u8                      afonly;
 149};
 150
 151struct cma_multicast {
 152        struct rdma_id_private *id_priv;
 153        union {
 154                struct ib_sa_multicast *ib;
 155        } multicast;
 156        struct list_head        list;
 157        void                    *context;
 158        struct sockaddr_storage addr;
 159        struct kref             mcref;
 160};
 161
 162struct cma_work {
 163        struct work_struct      work;
 164        struct rdma_id_private  *id;
 165        enum rdma_cm_state      old_state;
 166        enum rdma_cm_state      new_state;
 167        struct rdma_cm_event    event;
 168};
 169
 170struct cma_ndev_work {
 171        struct work_struct      work;
 172        struct rdma_id_private  *id;
 173        struct rdma_cm_event    event;
 174};
 175
 176struct iboe_mcast_work {
 177        struct work_struct       work;
 178        struct rdma_id_private  *id;
 179        struct cma_multicast    *mc;
 180};
 181
 182union cma_ip_addr {
 183        struct in6_addr ip6;
 184        struct {
 185                __be32 pad[3];
 186                __be32 addr;
 187        } ip4;
 188};
 189
 190struct cma_hdr {
 191        u8 cma_version;
 192        u8 ip_version;  /* IP version: 7:4 */
 193        __be16 port;
 194        union cma_ip_addr src_addr;
 195        union cma_ip_addr dst_addr;
 196};
 197
 198#define CMA_VERSION 0x00
 199
 200static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
 201{
 202        unsigned long flags;
 203        int ret;
 204
 205        spin_lock_irqsave(&id_priv->lock, flags);
 206        ret = (id_priv->state == comp);
 207        spin_unlock_irqrestore(&id_priv->lock, flags);
 208        return ret;
 209}
 210
 211static int cma_comp_exch(struct rdma_id_private *id_priv,
 212                         enum rdma_cm_state comp, enum rdma_cm_state exch)
 213{
 214        unsigned long flags;
 215        int ret;
 216
 217        spin_lock_irqsave(&id_priv->lock, flags);
 218        if ((ret = (id_priv->state == comp)))
 219                id_priv->state = exch;
 220        spin_unlock_irqrestore(&id_priv->lock, flags);
 221        return ret;
 222}
 223
 224static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
 225                                   enum rdma_cm_state exch)
 226{
 227        unsigned long flags;
 228        enum rdma_cm_state old;
 229
 230        spin_lock_irqsave(&id_priv->lock, flags);
 231        old = id_priv->state;
 232        id_priv->state = exch;
 233        spin_unlock_irqrestore(&id_priv->lock, flags);
 234        return old;
 235}
 236
 237static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
 238{
 239        return hdr->ip_version >> 4;
 240}
 241
 242static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
 243{
 244        hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
 245}
 246
 247static void cma_attach_to_dev(struct rdma_id_private *id_priv,
 248                              struct cma_device *cma_dev)
 249{
 250        atomic_inc(&cma_dev->refcount);
 251        id_priv->cma_dev = cma_dev;
 252        id_priv->id.device = cma_dev->device;
 253        id_priv->id.route.addr.dev_addr.transport =
 254                rdma_node_get_transport(cma_dev->device->node_type);
 255        list_add_tail(&id_priv->list, &cma_dev->id_list);
 256}
 257
 258static inline void cma_deref_dev(struct cma_device *cma_dev)
 259{
 260        if (atomic_dec_and_test(&cma_dev->refcount))
 261                complete(&cma_dev->comp);
 262}
 263
 264static inline void release_mc(struct kref *kref)
 265{
 266        struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
 267
 268        kfree(mc->multicast.ib);
 269        kfree(mc);
 270}
 271
 272static void cma_release_dev(struct rdma_id_private *id_priv)
 273{
 274        mutex_lock(&lock);
 275        list_del(&id_priv->list);
 276        cma_deref_dev(id_priv->cma_dev);
 277        id_priv->cma_dev = NULL;
 278        mutex_unlock(&lock);
 279}
 280
 281static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
 282{
 283        return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
 284}
 285
 286static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
 287{
 288        return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
 289}
 290
 291static inline unsigned short cma_family(struct rdma_id_private *id_priv)
 292{
 293        return id_priv->id.route.addr.src_addr.ss_family;
 294}
 295
 296static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
 297{
 298        struct ib_sa_mcmember_rec rec;
 299        int ret = 0;
 300
 301        if (id_priv->qkey) {
 302                if (qkey && id_priv->qkey != qkey)
 303                        return -EINVAL;
 304                return 0;
 305        }
 306
 307        if (qkey) {
 308                id_priv->qkey = qkey;
 309                return 0;
 310        }
 311
 312        switch (id_priv->id.ps) {
 313        case RDMA_PS_UDP:
 314        case RDMA_PS_IB:
 315                id_priv->qkey = RDMA_UDP_QKEY;
 316                break;
 317        case RDMA_PS_IPOIB:
 318                ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
 319                ret = ib_sa_get_mcmember_rec(id_priv->id.device,
 320                                             id_priv->id.port_num, &rec.mgid,
 321                                             &rec);
 322                if (!ret)
 323                        id_priv->qkey = be32_to_cpu(rec.qkey);
 324                break;
 325        default:
 326                break;
 327        }
 328        return ret;
 329}
 330
 331static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
 332{
 333        int i;
 334        int err;
 335        struct ib_port_attr props;
 336        union ib_gid tmp;
 337
 338        err = ib_query_port(device, port_num, &props);
 339        if (err)
 340                return err;
 341
 342        for (i = 0; i < props.gid_tbl_len; ++i) {
 343                err = ib_query_gid(device, port_num, i, &tmp);
 344                if (err)
 345                        return err;
 346                if (!memcmp(&tmp, gid, sizeof tmp))
 347                        return 0;
 348        }
 349
 350        return -EADDRNOTAVAIL;
 351}
 352
 353static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
 354{
 355        dev_addr->dev_type = ARPHRD_INFINIBAND;
 356        rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
 357        ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
 358}
 359
 360static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 361{
 362        int ret;
 363
 364        if (addr->sa_family != AF_IB) {
 365                ret = rdma_translate_ip(addr, dev_addr);
 366        } else {
 367                cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
 368                ret = 0;
 369        }
 370
 371        return ret;
 372}
 373
 374static int cma_acquire_dev(struct rdma_id_private *id_priv)
 375{
 376        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 377        struct cma_device *cma_dev;
 378        union ib_gid gid, iboe_gid;
 379        int ret = -ENODEV;
 380        u8 port;
 381        enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
 382                IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
 383
 384        if (dev_ll != IB_LINK_LAYER_INFINIBAND &&
 385            id_priv->id.ps == RDMA_PS_IPOIB)
 386                return -EINVAL;
 387
 388        mutex_lock(&lock);
 389        iboe_addr_get_sgid(dev_addr, &iboe_gid);
 390        memcpy(&gid, dev_addr->src_dev_addr +
 391               rdma_addr_gid_offset(dev_addr), sizeof gid);
 392        list_for_each_entry(cma_dev, &dev_list, list) {
 393                for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
 394                        if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
 395                                if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
 396                                    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
 397                                        ret = find_gid_port(cma_dev->device, &iboe_gid, port);
 398                                else
 399                                        ret = find_gid_port(cma_dev->device, &gid, port);
 400
 401                                if (!ret) {
 402                                        id_priv->id.port_num = port;
 403                                        goto out;
 404                                }
 405                        }
 406                }
 407        }
 408
 409out:
 410        if (!ret)
 411                cma_attach_to_dev(id_priv, cma_dev);
 412
 413        mutex_unlock(&lock);
 414        return ret;
 415}
 416
 417/*
 418 * Select the source IB device and address to reach the destination IB address.
 419 */
 420static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
 421{
 422        struct cma_device *cma_dev, *cur_dev;
 423        struct sockaddr_ib *addr;
 424        union ib_gid gid, sgid, *dgid;
 425        u16 pkey, index;
 426        u8 p;
 427        int i;
 428
 429        cma_dev = NULL;
 430        addr = (struct sockaddr_ib *) cma_dst_addr(id_priv);
 431        dgid = (union ib_gid *) &addr->sib_addr;
 432        pkey = ntohs(addr->sib_pkey);
 433
 434        list_for_each_entry(cur_dev, &dev_list, list) {
 435                if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
 436                        continue;
 437
 438                for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
 439                        if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
 440                                continue;
 441
 442                        for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) {
 443                                if (!memcmp(&gid, dgid, sizeof(gid))) {
 444                                        cma_dev = cur_dev;
 445                                        sgid = gid;
 446                                        id_priv->id.port_num = p;
 447                                        goto found;
 448                                }
 449
 450                                if (!cma_dev && (gid.global.subnet_prefix ==
 451                                                 dgid->global.subnet_prefix)) {
 452                                        cma_dev = cur_dev;
 453                                        sgid = gid;
 454                                        id_priv->id.port_num = p;
 455                                }
 456                        }
 457                }
 458        }
 459
 460        if (!cma_dev)
 461                return -ENODEV;
 462
 463found:
 464        cma_attach_to_dev(id_priv, cma_dev);
 465        addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
 466        memcpy(&addr->sib_addr, &sgid, sizeof sgid);
 467        cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
 468        return 0;
 469}
 470
 471static void cma_deref_id(struct rdma_id_private *id_priv)
 472{
 473        if (atomic_dec_and_test(&id_priv->refcount))
 474                complete(&id_priv->comp);
 475}
 476
 477static int cma_disable_callback(struct rdma_id_private *id_priv,
 478                                enum rdma_cm_state state)
 479{
 480        mutex_lock(&id_priv->handler_mutex);
 481        if (id_priv->state != state) {
 482                mutex_unlock(&id_priv->handler_mutex);
 483                return -EINVAL;
 484        }
 485        return 0;
 486}
 487
 488struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
 489                                  void *context, enum rdma_port_space ps,
 490                                  enum ib_qp_type qp_type)
 491{
 492        struct rdma_id_private *id_priv;
 493
 494        id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
 495        if (!id_priv)
 496                return ERR_PTR(-ENOMEM);
 497
 498        id_priv->owner = task_pid_nr(current);
 499        id_priv->state = RDMA_CM_IDLE;
 500        id_priv->id.context = context;
 501        id_priv->id.event_handler = event_handler;
 502        id_priv->id.ps = ps;
 503        id_priv->id.qp_type = qp_type;
 504        spin_lock_init(&id_priv->lock);
 505        mutex_init(&id_priv->qp_mutex);
 506        init_completion(&id_priv->comp);
 507        atomic_set(&id_priv->refcount, 1);
 508        mutex_init(&id_priv->handler_mutex);
 509        INIT_LIST_HEAD(&id_priv->listen_list);
 510        INIT_LIST_HEAD(&id_priv->mc_list);
 511        get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
 512
 513        return &id_priv->id;
 514}
 515EXPORT_SYMBOL(rdma_create_id);
 516
 517static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 518{
 519        struct ib_qp_attr qp_attr;
 520        int qp_attr_mask, ret;
 521
 522        qp_attr.qp_state = IB_QPS_INIT;
 523        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 524        if (ret)
 525                return ret;
 526
 527        ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
 528        if (ret)
 529                return ret;
 530
 531        qp_attr.qp_state = IB_QPS_RTR;
 532        ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
 533        if (ret)
 534                return ret;
 535
 536        qp_attr.qp_state = IB_QPS_RTS;
 537        qp_attr.sq_psn = 0;
 538        ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
 539
 540        return ret;
 541}
 542
 543static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 544{
 545        struct ib_qp_attr qp_attr;
 546        int qp_attr_mask, ret;
 547
 548        qp_attr.qp_state = IB_QPS_INIT;
 549        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 550        if (ret)
 551                return ret;
 552
 553        return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
 554}
 555
 556int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 557                   struct ib_qp_init_attr *qp_init_attr)
 558{
 559        struct rdma_id_private *id_priv;
 560        struct ib_qp *qp;
 561        int ret;
 562
 563        id_priv = container_of(id, struct rdma_id_private, id);
 564        if (id->device != pd->device)
 565                return -EINVAL;
 566
 567        qp = ib_create_qp(pd, qp_init_attr);
 568        if (IS_ERR(qp))
 569                return PTR_ERR(qp);
 570
 571        if (id->qp_type == IB_QPT_UD)
 572                ret = cma_init_ud_qp(id_priv, qp);
 573        else
 574                ret = cma_init_conn_qp(id_priv, qp);
 575        if (ret)
 576                goto err;
 577
 578        id->qp = qp;
 579        id_priv->qp_num = qp->qp_num;
 580        id_priv->srq = (qp->srq != NULL);
 581        return 0;
 582err:
 583        ib_destroy_qp(qp);
 584        return ret;
 585}
 586EXPORT_SYMBOL(rdma_create_qp);
 587
 588void rdma_destroy_qp(struct rdma_cm_id *id)
 589{
 590        struct rdma_id_private *id_priv;
 591
 592        id_priv = container_of(id, struct rdma_id_private, id);
 593        mutex_lock(&id_priv->qp_mutex);
 594        ib_destroy_qp(id_priv->id.qp);
 595        id_priv->id.qp = NULL;
 596        mutex_unlock(&id_priv->qp_mutex);
 597}
 598EXPORT_SYMBOL(rdma_destroy_qp);
 599
 600static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
 601                             struct rdma_conn_param *conn_param)
 602{
 603        struct ib_qp_attr qp_attr;
 604        int qp_attr_mask, ret;
 605
 606        mutex_lock(&id_priv->qp_mutex);
 607        if (!id_priv->id.qp) {
 608                ret = 0;
 609                goto out;
 610        }
 611
 612        /* Need to update QP attributes from default values. */
 613        qp_attr.qp_state = IB_QPS_INIT;
 614        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 615        if (ret)
 616                goto out;
 617
 618        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 619        if (ret)
 620                goto out;
 621
 622        qp_attr.qp_state = IB_QPS_RTR;
 623        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 624        if (ret)
 625                goto out;
 626
 627        if (conn_param)
 628                qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
 629        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 630out:
 631        mutex_unlock(&id_priv->qp_mutex);
 632        return ret;
 633}
 634
 635static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
 636                             struct rdma_conn_param *conn_param)
 637{
 638        struct ib_qp_attr qp_attr;
 639        int qp_attr_mask, ret;
 640
 641        mutex_lock(&id_priv->qp_mutex);
 642        if (!id_priv->id.qp) {
 643                ret = 0;
 644                goto out;
 645        }
 646
 647        qp_attr.qp_state = IB_QPS_RTS;
 648        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 649        if (ret)
 650                goto out;
 651
 652        if (conn_param)
 653                qp_attr.max_rd_atomic = conn_param->initiator_depth;
 654        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 655out:
 656        mutex_unlock(&id_priv->qp_mutex);
 657        return ret;
 658}
 659
 660static int cma_modify_qp_err(struct rdma_id_private *id_priv)
 661{
 662        struct ib_qp_attr qp_attr;
 663        int ret;
 664
 665        mutex_lock(&id_priv->qp_mutex);
 666        if (!id_priv->id.qp) {
 667                ret = 0;
 668                goto out;
 669        }
 670
 671        qp_attr.qp_state = IB_QPS_ERR;
 672        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
 673out:
 674        mutex_unlock(&id_priv->qp_mutex);
 675        return ret;
 676}
 677
 678static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
 679                               struct ib_qp_attr *qp_attr, int *qp_attr_mask)
 680{
 681        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 682        int ret;
 683        u16 pkey;
 684
 685        if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
 686            IB_LINK_LAYER_INFINIBAND)
 687                pkey = ib_addr_get_pkey(dev_addr);
 688        else
 689                pkey = 0xffff;
 690
 691        ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
 692                                  pkey, &qp_attr->pkey_index);
 693        if (ret)
 694                return ret;
 695
 696        qp_attr->port_num = id_priv->id.port_num;
 697        *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
 698
 699        if (id_priv->id.qp_type == IB_QPT_UD) {
 700                ret = cma_set_qkey(id_priv, 0);
 701                if (ret)
 702                        return ret;
 703
 704                qp_attr->qkey = id_priv->qkey;
 705                *qp_attr_mask |= IB_QP_QKEY;
 706        } else {
 707                qp_attr->qp_access_flags = 0;
 708                *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
 709        }
 710        return 0;
 711}
 712
 713int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 714                       int *qp_attr_mask)
 715{
 716        struct rdma_id_private *id_priv;
 717        int ret = 0;
 718
 719        id_priv = container_of(id, struct rdma_id_private, id);
 720        switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
 721        case RDMA_TRANSPORT_IB:
 722                if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))
 723                        ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
 724                else
 725                        ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
 726                                                 qp_attr_mask);
 727                if (qp_attr->qp_state == IB_QPS_RTR)
 728                        qp_attr->rq_psn = id_priv->seq_num;
 729                break;
 730        case RDMA_TRANSPORT_IWARP:
 731                if (!id_priv->cm_id.iw) {
 732                        qp_attr->qp_access_flags = 0;
 733                        *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
 734                } else
 735                        ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
 736                                                 qp_attr_mask);
 737                break;
 738        default:
 739                ret = -ENOSYS;
 740                break;
 741        }
 742
 743        return ret;
 744}
 745EXPORT_SYMBOL(rdma_init_qp_attr);
 746
 747static inline int cma_zero_addr(struct sockaddr *addr)
 748{
 749        switch (addr->sa_family) {
 750        case AF_INET:
 751                return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
 752        case AF_INET6:
 753                return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
 754        case AF_IB:
 755                return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
 756        default:
 757                return 0;
 758        }
 759}
 760
 761static inline int cma_loopback_addr(struct sockaddr *addr)
 762{
 763        switch (addr->sa_family) {
 764        case AF_INET:
 765                return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
 766        case AF_INET6:
 767                return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
 768        case AF_IB:
 769                return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
 770        default:
 771                return 0;
 772        }
 773}
 774
 775static inline int cma_any_addr(struct sockaddr *addr)
 776{
 777        return cma_zero_addr(addr) || cma_loopback_addr(addr);
 778}
 779
 780static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
 781{
 782        if (src->sa_family != dst->sa_family)
 783                return -1;
 784
 785        switch (src->sa_family) {
 786        case AF_INET:
 787                return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
 788                       ((struct sockaddr_in *) dst)->sin_addr.s_addr;
 789        case AF_INET6:
 790                return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
 791                                     &((struct sockaddr_in6 *) dst)->sin6_addr);
 792        default:
 793                return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
 794                                   &((struct sockaddr_ib *) dst)->sib_addr);
 795        }
 796}
 797
 798static __be16 cma_port(struct sockaddr *addr)
 799{
 800        struct sockaddr_ib *sib;
 801
 802        switch (addr->sa_family) {
 803        case AF_INET:
 804                return ((struct sockaddr_in *) addr)->sin_port;
 805        case AF_INET6:
 806                return ((struct sockaddr_in6 *) addr)->sin6_port;
 807        case AF_IB:
 808                sib = (struct sockaddr_ib *) addr;
 809                return htons((u16) (be64_to_cpu(sib->sib_sid) &
 810                                    be64_to_cpu(sib->sib_sid_mask)));
 811        default:
 812                return 0;
 813        }
 814}
 815
 816static inline int cma_any_port(struct sockaddr *addr)
 817{
 818        return !cma_port(addr);
 819}
 820
 821static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
 822                             struct ib_sa_path_rec *path)
 823{
 824        struct sockaddr_ib *listen_ib, *ib;
 825
 826        listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
 827        ib = (struct sockaddr_ib *) &id->route.addr.src_addr;
 828        ib->sib_family = listen_ib->sib_family;
 829        ib->sib_pkey = path->pkey;
 830        ib->sib_flowinfo = path->flow_label;
 831        memcpy(&ib->sib_addr, &path->sgid, 16);
 832        ib->sib_sid = listen_ib->sib_sid;
 833        ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
 834        ib->sib_scope_id = listen_ib->sib_scope_id;
 835
 836        ib = (struct sockaddr_ib *) &id->route.addr.dst_addr;
 837        ib->sib_family = listen_ib->sib_family;
 838        ib->sib_pkey = path->pkey;
 839        ib->sib_flowinfo = path->flow_label;
 840        memcpy(&ib->sib_addr, &path->dgid, 16);
 841}
 842
 843static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
 844                              struct cma_hdr *hdr)
 845{
 846        struct sockaddr_in *listen4, *ip4;
 847
 848        listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr;
 849        ip4 = (struct sockaddr_in *) &id->route.addr.src_addr;
 850        ip4->sin_family = listen4->sin_family;
 851        ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
 852        ip4->sin_port = listen4->sin_port;
 853
 854        ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr;
 855        ip4->sin_family = listen4->sin_family;
 856        ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
 857        ip4->sin_port = hdr->port;
 858}
 859
 860static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
 861                              struct cma_hdr *hdr)
 862{
 863        struct sockaddr_in6 *listen6, *ip6;
 864
 865        listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr;
 866        ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr;
 867        ip6->sin6_family = listen6->sin6_family;
 868        ip6->sin6_addr = hdr->dst_addr.ip6;
 869        ip6->sin6_port = listen6->sin6_port;
 870
 871        ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr;
 872        ip6->sin6_family = listen6->sin6_family;
 873        ip6->sin6_addr = hdr->src_addr.ip6;
 874        ip6->sin6_port = hdr->port;
 875}
 876
 877static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
 878                             struct ib_cm_event *ib_event)
 879{
 880        struct cma_hdr *hdr;
 881
 882        if ((listen_id->route.addr.src_addr.ss_family == AF_IB) &&
 883            (ib_event->event == IB_CM_REQ_RECEIVED)) {
 884                cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path);
 885                return 0;
 886        }
 887
 888        hdr = ib_event->private_data;
 889        if (hdr->cma_version != CMA_VERSION)
 890                return -EINVAL;
 891
 892        switch (cma_get_ip_ver(hdr)) {
 893        case 4:
 894                cma_save_ip4_info(id, listen_id, hdr);
 895                break;
 896        case 6:
 897                cma_save_ip6_info(id, listen_id, hdr);
 898                break;
 899        default:
 900                return -EINVAL;
 901        }
 902        return 0;
 903}
 904
 905static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
 906{
 907        return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
 908}
 909
 910static void cma_cancel_route(struct rdma_id_private *id_priv)
 911{
 912        switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
 913        case IB_LINK_LAYER_INFINIBAND:
 914                if (id_priv->query)
 915                        ib_sa_cancel_query(id_priv->query_id, id_priv->query);
 916                break;
 917        default:
 918                break;
 919        }
 920}
 921
 922static void cma_cancel_listens(struct rdma_id_private *id_priv)
 923{
 924        struct rdma_id_private *dev_id_priv;
 925
 926        /*
 927         * Remove from listen_any_list to prevent added devices from spawning
 928         * additional listen requests.
 929         */
 930        mutex_lock(&lock);
 931        list_del(&id_priv->list);
 932
 933        while (!list_empty(&id_priv->listen_list)) {
 934                dev_id_priv = list_entry(id_priv->listen_list.next,
 935                                         struct rdma_id_private, listen_list);
 936                /* sync with device removal to avoid duplicate destruction */
 937                list_del_init(&dev_id_priv->list);
 938                list_del(&dev_id_priv->listen_list);
 939                mutex_unlock(&lock);
 940
 941                rdma_destroy_id(&dev_id_priv->id);
 942                mutex_lock(&lock);
 943        }
 944        mutex_unlock(&lock);
 945}
 946
 947static void cma_cancel_operation(struct rdma_id_private *id_priv,
 948                                 enum rdma_cm_state state)
 949{
 950        switch (state) {
 951        case RDMA_CM_ADDR_QUERY:
 952                rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
 953                break;
 954        case RDMA_CM_ROUTE_QUERY:
 955                cma_cancel_route(id_priv);
 956                break;
 957        case RDMA_CM_LISTEN:
 958                if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)
 959                        cma_cancel_listens(id_priv);
 960                break;
 961        default:
 962                break;
 963        }
 964}
 965
 966static void cma_release_port(struct rdma_id_private *id_priv)
 967{
 968        struct rdma_bind_list *bind_list = id_priv->bind_list;
 969
 970        if (!bind_list)
 971                return;
 972
 973        mutex_lock(&lock);
 974        hlist_del(&id_priv->node);
 975        if (hlist_empty(&bind_list->owners)) {
 976                idr_remove(bind_list->ps, bind_list->port);
 977                kfree(bind_list);
 978        }
 979        mutex_unlock(&lock);
 980}
 981
 982static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
 983{
 984        struct cma_multicast *mc;
 985
 986        while (!list_empty(&id_priv->mc_list)) {
 987                mc = container_of(id_priv->mc_list.next,
 988                                  struct cma_multicast, list);
 989                list_del(&mc->list);
 990                switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
 991                case IB_LINK_LAYER_INFINIBAND:
 992                        ib_sa_free_multicast(mc->multicast.ib);
 993                        kfree(mc);
 994                        break;
 995                case IB_LINK_LAYER_ETHERNET:
 996                        kref_put(&mc->mcref, release_mc);
 997                        break;
 998                default:
 999                        break;
1000                }
1001        }
1002}
1003
1004void rdma_destroy_id(struct rdma_cm_id *id)
1005{
1006        struct rdma_id_private *id_priv;
1007        enum rdma_cm_state state;
1008
1009        id_priv = container_of(id, struct rdma_id_private, id);
1010        state = cma_exch(id_priv, RDMA_CM_DESTROYING);
1011        cma_cancel_operation(id_priv, state);
1012
1013        /*
1014         * Wait for any active callback to finish.  New callbacks will find
1015         * the id_priv state set to destroying and abort.
1016         */
1017        mutex_lock(&id_priv->handler_mutex);
1018        mutex_unlock(&id_priv->handler_mutex);
1019
1020        if (id_priv->cma_dev) {
1021                switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
1022                case RDMA_TRANSPORT_IB:
1023                        if (id_priv->cm_id.ib)
1024                                ib_destroy_cm_id(id_priv->cm_id.ib);
1025                        break;
1026                case RDMA_TRANSPORT_IWARP:
1027                        if (id_priv->cm_id.iw)
1028                                iw_destroy_cm_id(id_priv->cm_id.iw);
1029                        break;
1030                default:
1031                        break;
1032                }
1033                cma_leave_mc_groups(id_priv);
1034                cma_release_dev(id_priv);
1035        }
1036
1037        cma_release_port(id_priv);
1038        cma_deref_id(id_priv);
1039        wait_for_completion(&id_priv->comp);
1040
1041        if (id_priv->internal_id)
1042                cma_deref_id(id_priv->id.context);
1043
1044        kfree(id_priv->id.route.path_rec);
1045        kfree(id_priv);
1046}
1047EXPORT_SYMBOL(rdma_destroy_id);
1048
1049static int cma_rep_recv(struct rdma_id_private *id_priv)
1050{
1051        int ret;
1052
1053        ret = cma_modify_qp_rtr(id_priv, NULL);
1054        if (ret)
1055                goto reject;
1056
1057        ret = cma_modify_qp_rts(id_priv, NULL);
1058        if (ret)
1059                goto reject;
1060
1061        ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
1062        if (ret)
1063                goto reject;
1064
1065        return 0;
1066reject:
1067        cma_modify_qp_err(id_priv);
1068        ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
1069                       NULL, 0, NULL, 0);
1070        return ret;
1071}
1072
1073static void cma_set_rep_event_data(struct rdma_cm_event *event,
1074                                   struct ib_cm_rep_event_param *rep_data,
1075                                   void *private_data)
1076{
1077        event->param.conn.private_data = private_data;
1078        event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
1079        event->param.conn.responder_resources = rep_data->responder_resources;
1080        event->param.conn.initiator_depth = rep_data->initiator_depth;
1081        event->param.conn.flow_control = rep_data->flow_control;
1082        event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
1083        event->param.conn.srq = rep_data->srq;
1084        event->param.conn.qp_num = rep_data->remote_qpn;
1085}
1086
1087static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1088{
1089        struct rdma_id_private *id_priv = cm_id->context;
1090        struct rdma_cm_event event;
1091        int ret = 0;
1092
1093        if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
1094                cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
1095            (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
1096                cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
1097                return 0;
1098
1099        memset(&event, 0, sizeof event);
1100        switch (ib_event->event) {
1101        case IB_CM_REQ_ERROR:
1102        case IB_CM_REP_ERROR:
1103                event.event = RDMA_CM_EVENT_UNREACHABLE;
1104                event.status = -ETIMEDOUT;
1105                break;
1106        case IB_CM_REP_RECEIVED:
1107                if (id_priv->id.qp) {
1108                        event.status = cma_rep_recv(id_priv);
1109                        event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
1110                                                     RDMA_CM_EVENT_ESTABLISHED;
1111                } else {
1112                        event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1113                }
1114                cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
1115                                       ib_event->private_data);
1116                break;
1117        case IB_CM_RTU_RECEIVED:
1118        case IB_CM_USER_ESTABLISHED:
1119                event.event = RDMA_CM_EVENT_ESTABLISHED;
1120                break;
1121        case IB_CM_DREQ_ERROR:
1122                event.status = -ETIMEDOUT; /* fall through */
1123        case IB_CM_DREQ_RECEIVED:
1124        case IB_CM_DREP_RECEIVED:
1125                if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
1126                                   RDMA_CM_DISCONNECT))
1127                        goto out;
1128                event.event = RDMA_CM_EVENT_DISCONNECTED;
1129                break;
1130        case IB_CM_TIMEWAIT_EXIT:
1131                event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
1132                break;
1133        case IB_CM_MRA_RECEIVED:
1134                /* ignore event */
1135                goto out;
1136        case IB_CM_REJ_RECEIVED:
1137                cma_modify_qp_err(id_priv);
1138                event.status = ib_event->param.rej_rcvd.reason;
1139                event.event = RDMA_CM_EVENT_REJECTED;
1140                event.param.conn.private_data = ib_event->private_data;
1141                event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1142                break;
1143        default:
1144                printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
1145                       ib_event->event);
1146                goto out;
1147        }
1148
1149        ret = id_priv->id.event_handler(&id_priv->id, &event);
1150        if (ret) {
1151                /* Destroy the CM ID by returning a non-zero value. */
1152                id_priv->cm_id.ib = NULL;
1153                cma_exch(id_priv, RDMA_CM_DESTROYING);
1154                mutex_unlock(&id_priv->handler_mutex);
1155                rdma_destroy_id(&id_priv->id);
1156                return ret;
1157        }
1158out:
1159        mutex_unlock(&id_priv->handler_mutex);
1160        return ret;
1161}
1162
1163static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1164                                               struct ib_cm_event *ib_event)
1165{
1166        struct rdma_id_private *id_priv;
1167        struct rdma_cm_id *id;
1168        struct rdma_route *rt;
1169        int ret;
1170
1171        id = rdma_create_id(listen_id->event_handler, listen_id->context,
1172                            listen_id->ps, ib_event->param.req_rcvd.qp_type);
1173        if (IS_ERR(id))
1174                return NULL;
1175
1176        id_priv = container_of(id, struct rdma_id_private, id);
1177        if (cma_save_net_info(id, listen_id, ib_event))
1178                goto err;
1179
1180        rt = &id->route;
1181        rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1182        rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1183                               GFP_KERNEL);
1184        if (!rt->path_rec)
1185                goto err;
1186
1187        rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1188        if (rt->num_paths == 2)
1189                rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1190
1191        if (cma_any_addr(cma_src_addr(id_priv))) {
1192                rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
1193                rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1194                ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
1195        } else {
1196                ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
1197                if (ret)
1198                        goto err;
1199        }
1200        rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1201
1202        id_priv->state = RDMA_CM_CONNECT;
1203        return id_priv;
1204
1205err:
1206        rdma_destroy_id(id);
1207        return NULL;
1208}
1209
1210static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1211                                              struct ib_cm_event *ib_event)
1212{
1213        struct rdma_id_private *id_priv;
1214        struct rdma_cm_id *id;
1215        int ret;
1216
1217        id = rdma_create_id(listen_id->event_handler, listen_id->context,
1218                            listen_id->ps, IB_QPT_UD);
1219        if (IS_ERR(id))
1220                return NULL;
1221
1222        id_priv = container_of(id, struct rdma_id_private, id);
1223        if (cma_save_net_info(id, listen_id, ib_event))
1224                goto err;
1225
1226        if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
1227                ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr);
1228                if (ret)
1229                        goto err;
1230        }
1231
1232        id_priv->state = RDMA_CM_CONNECT;
1233        return id_priv;
1234err:
1235        rdma_destroy_id(id);
1236        return NULL;
1237}
1238
1239static void cma_set_req_event_data(struct rdma_cm_event *event,
1240                                   struct ib_cm_req_event_param *req_data,
1241                                   void *private_data, int offset)
1242{
1243        event->param.conn.private_data = private_data + offset;
1244        event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1245        event->param.conn.responder_resources = req_data->responder_resources;
1246        event->param.conn.initiator_depth = req_data->initiator_depth;
1247        event->param.conn.flow_control = req_data->flow_control;
1248        event->param.conn.retry_count = req_data->retry_count;
1249        event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1250        event->param.conn.srq = req_data->srq;
1251        event->param.conn.qp_num = req_data->remote_qpn;
1252}
1253
1254static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
1255{
1256        return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
1257                 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
1258                ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
1259                 (id->qp_type == IB_QPT_UD)) ||
1260                (!id->qp_type));
1261}
1262
1263static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1264{
1265        struct rdma_id_private *listen_id, *conn_id;
1266        struct rdma_cm_event event;
1267        int offset, ret;
1268
1269        listen_id = cm_id->context;
1270        if (!cma_check_req_qp_type(&listen_id->id, ib_event))
1271                return -EINVAL;
1272
1273        if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
1274                return -ECONNABORTED;
1275
1276        memset(&event, 0, sizeof event);
1277        offset = cma_user_data_offset(listen_id);
1278        event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1279        if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
1280                conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1281                event.param.ud.private_data = ib_event->private_data + offset;
1282                event.param.ud.private_data_len =
1283                                IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1284        } else {
1285                conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1286                cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1287                                       ib_event->private_data, offset);
1288        }
1289        if (!conn_id) {
1290                ret = -ENOMEM;
1291                goto err1;
1292        }
1293
1294        mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1295        ret = cma_acquire_dev(conn_id);
1296        if (ret)
1297                goto err2;
1298
1299        conn_id->cm_id.ib = cm_id;
1300        cm_id->context = conn_id;
1301        cm_id->cm_handler = cma_ib_handler;
1302
1303        /*
1304         * Protect against the user destroying conn_id from another thread
1305         * until we're done accessing it.
1306         */
1307        atomic_inc(&conn_id->refcount);
1308        ret = conn_id->id.event_handler(&conn_id->id, &event);
1309        if (ret)
1310                goto err3;
1311
1312        /*
1313         * Acquire mutex to prevent user executing rdma_destroy_id()
1314         * while we're accessing the cm_id.
1315         */
1316        mutex_lock(&lock);
1317        if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
1318                ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1319        mutex_unlock(&lock);
1320        mutex_unlock(&conn_id->handler_mutex);
1321        mutex_unlock(&listen_id->handler_mutex);
1322        cma_deref_id(conn_id);
1323        return 0;
1324
1325err3:
1326        cma_deref_id(conn_id);
1327        /* Destroy the CM ID by returning a non-zero value. */
1328        conn_id->cm_id.ib = NULL;
1329err2:
1330        cma_exch(conn_id, RDMA_CM_DESTROYING);
1331        mutex_unlock(&conn_id->handler_mutex);
1332err1:
1333        mutex_unlock(&listen_id->handler_mutex);
1334        if (conn_id)
1335                rdma_destroy_id(&conn_id->id);
1336        return ret;
1337}
1338
1339__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
1340{
1341        if (addr->sa_family == AF_IB)
1342                return ((struct sockaddr_ib *) addr)->sib_sid;
1343
1344        return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));
1345}
1346EXPORT_SYMBOL(rdma_get_service_id);
1347
1348static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1349                                 struct ib_cm_compare_data *compare)
1350{
1351        struct cma_hdr *cma_data, *cma_mask;
1352        __be32 ip4_addr;
1353        struct in6_addr ip6_addr;
1354
1355        memset(compare, 0, sizeof *compare);
1356        cma_data = (void *) compare->data;
1357        cma_mask = (void *) compare->mask;
1358
1359        switch (addr->sa_family) {
1360        case AF_INET:
1361                ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1362                cma_set_ip_ver(cma_data, 4);
1363                cma_set_ip_ver(cma_mask, 0xF);
1364                if (!cma_any_addr(addr)) {
1365                        cma_data->dst_addr.ip4.addr = ip4_addr;
1366                        cma_mask->dst_addr.ip4.addr = htonl(~0);
1367                }
1368                break;
1369        case AF_INET6:
1370                ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1371                cma_set_ip_ver(cma_data, 6);
1372                cma_set_ip_ver(cma_mask, 0xF);
1373                if (!cma_any_addr(addr)) {
1374                        cma_data->dst_addr.ip6 = ip6_addr;
1375                        memset(&cma_mask->dst_addr.ip6, 0xFF,
1376                               sizeof cma_mask->dst_addr.ip6);
1377                }
1378                break;
1379        default:
1380                break;
1381        }
1382}
1383
1384static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1385{
1386        struct rdma_id_private *id_priv = iw_id->context;
1387        struct rdma_cm_event event;
1388        struct sockaddr_in *sin;
1389        int ret = 0;
1390
1391        if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
1392                return 0;
1393
1394        memset(&event, 0, sizeof event);
1395        switch (iw_event->event) {
1396        case IW_CM_EVENT_CLOSE:
1397                event.event = RDMA_CM_EVENT_DISCONNECTED;
1398                break;
1399        case IW_CM_EVENT_CONNECT_REPLY:
1400                sin = (struct sockaddr_in *) cma_src_addr(id_priv);
1401                *sin = iw_event->local_addr;
1402                sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
1403                *sin = iw_event->remote_addr;
1404                switch (iw_event->status) {
1405                case 0:
1406                        event.event = RDMA_CM_EVENT_ESTABLISHED;
1407                        event.param.conn.initiator_depth = iw_event->ird;
1408                        event.param.conn.responder_resources = iw_event->ord;
1409                        break;
1410                case -ECONNRESET:
1411                case -ECONNREFUSED:
1412                        event.event = RDMA_CM_EVENT_REJECTED;
1413                        break;
1414                case -ETIMEDOUT:
1415                        event.event = RDMA_CM_EVENT_UNREACHABLE;
1416                        break;
1417                default:
1418                        event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1419                        break;
1420                }
1421                break;
1422        case IW_CM_EVENT_ESTABLISHED:
1423                event.event = RDMA_CM_EVENT_ESTABLISHED;
1424                event.param.conn.initiator_depth = iw_event->ird;
1425                event.param.conn.responder_resources = iw_event->ord;
1426                break;
1427        default:
1428                BUG_ON(1);
1429        }
1430
1431        event.status = iw_event->status;
1432        event.param.conn.private_data = iw_event->private_data;
1433        event.param.conn.private_data_len = iw_event->private_data_len;
1434        ret = id_priv->id.event_handler(&id_priv->id, &event);
1435        if (ret) {
1436                /* Destroy the CM ID by returning a non-zero value. */
1437                id_priv->cm_id.iw = NULL;
1438                cma_exch(id_priv, RDMA_CM_DESTROYING);
1439                mutex_unlock(&id_priv->handler_mutex);
1440                rdma_destroy_id(&id_priv->id);
1441                return ret;
1442        }
1443
1444        mutex_unlock(&id_priv->handler_mutex);
1445        return ret;
1446}
1447
1448static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1449                               struct iw_cm_event *iw_event)
1450{
1451        struct rdma_cm_id *new_cm_id;
1452        struct rdma_id_private *listen_id, *conn_id;
1453        struct sockaddr_in *sin;
1454        struct net_device *dev = NULL;
1455        struct rdma_cm_event event;
1456        int ret;
1457        struct ib_device_attr attr;
1458
1459        listen_id = cm_id->context;
1460        if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
1461                return -ECONNABORTED;
1462
1463        /* Create a new RDMA id for the new IW CM ID */
1464        new_cm_id = rdma_create_id(listen_id->id.event_handler,
1465                                   listen_id->id.context,
1466                                   RDMA_PS_TCP, IB_QPT_RC);
1467        if (IS_ERR(new_cm_id)) {
1468                ret = -ENOMEM;
1469                goto out;
1470        }
1471        conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1472        mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1473        conn_id->state = RDMA_CM_CONNECT;
1474
1475        dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
1476        if (!dev) {
1477                ret = -EADDRNOTAVAIL;
1478                mutex_unlock(&conn_id->handler_mutex);
1479                rdma_destroy_id(new_cm_id);
1480                goto out;
1481        }
1482        ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1483        if (ret) {
1484                mutex_unlock(&conn_id->handler_mutex);
1485                rdma_destroy_id(new_cm_id);
1486                goto out;
1487        }
1488
1489        ret = cma_acquire_dev(conn_id);
1490        if (ret) {
1491                mutex_unlock(&conn_id->handler_mutex);
1492                rdma_destroy_id(new_cm_id);
1493                goto out;
1494        }
1495
1496        conn_id->cm_id.iw = cm_id;
1497        cm_id->context = conn_id;
1498        cm_id->cm_handler = cma_iw_handler;
1499
1500        sin = (struct sockaddr_in *) cma_src_addr(conn_id);
1501        *sin = iw_event->local_addr;
1502        sin = (struct sockaddr_in *) cma_dst_addr(conn_id);
1503        *sin = iw_event->remote_addr;
1504
1505        ret = ib_query_device(conn_id->id.device, &attr);
1506        if (ret) {
1507                mutex_unlock(&conn_id->handler_mutex);
1508                rdma_destroy_id(new_cm_id);
1509                goto out;
1510        }
1511
1512        memset(&event, 0, sizeof event);
1513        event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1514        event.param.conn.private_data = iw_event->private_data;
1515        event.param.conn.private_data_len = iw_event->private_data_len;
1516        event.param.conn.initiator_depth = iw_event->ird;
1517        event.param.conn.responder_resources = iw_event->ord;
1518
1519        /*
1520         * Protect against the user destroying conn_id from another thread
1521         * until we're done accessing it.
1522         */
1523        atomic_inc(&conn_id->refcount);
1524        ret = conn_id->id.event_handler(&conn_id->id, &event);
1525        if (ret) {
1526                /* User wants to destroy the CM ID */
1527                conn_id->cm_id.iw = NULL;
1528                cma_exch(conn_id, RDMA_CM_DESTROYING);
1529                mutex_unlock(&conn_id->handler_mutex);
1530                cma_deref_id(conn_id);
1531                rdma_destroy_id(&conn_id->id);
1532                goto out;
1533        }
1534
1535        mutex_unlock(&conn_id->handler_mutex);
1536        cma_deref_id(conn_id);
1537
1538out:
1539        if (dev)
1540                dev_put(dev);
1541        mutex_unlock(&listen_id->handler_mutex);
1542        return ret;
1543}
1544
1545static int cma_ib_listen(struct rdma_id_private *id_priv)
1546{
1547        struct ib_cm_compare_data compare_data;
1548        struct sockaddr *addr;
1549        struct ib_cm_id *id;
1550        __be64 svc_id;
1551        int ret;
1552
1553        id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv);
1554        if (IS_ERR(id))
1555                return PTR_ERR(id);
1556
1557        id_priv->cm_id.ib = id;
1558
1559        addr = cma_src_addr(id_priv);
1560        svc_id = rdma_get_service_id(&id_priv->id, addr);
1561        if (cma_any_addr(addr) && !id_priv->afonly)
1562                ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1563        else {
1564                cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1565                ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1566        }
1567
1568        if (ret) {
1569                ib_destroy_cm_id(id_priv->cm_id.ib);
1570                id_priv->cm_id.ib = NULL;
1571        }
1572
1573        return ret;
1574}
1575
1576static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1577{
1578        int ret;
1579        struct sockaddr_in *sin;
1580        struct iw_cm_id *id;
1581
1582        id = iw_create_cm_id(id_priv->id.device,
1583                             iw_conn_req_handler,
1584                             id_priv);
1585        if (IS_ERR(id))
1586                return PTR_ERR(id);
1587
1588        id_priv->cm_id.iw = id;
1589
1590        sin = (struct sockaddr_in *) cma_src_addr(id_priv);
1591        id_priv->cm_id.iw->local_addr = *sin;
1592
1593        ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1594
1595        if (ret) {
1596                iw_destroy_cm_id(id_priv->cm_id.iw);
1597                id_priv->cm_id.iw = NULL;
1598        }
1599
1600        return ret;
1601}
1602
1603static int cma_listen_handler(struct rdma_cm_id *id,
1604                              struct rdma_cm_event *event)
1605{
1606        struct rdma_id_private *id_priv = id->context;
1607
1608        id->context = id_priv->id.context;
1609        id->event_handler = id_priv->id.event_handler;
1610        return id_priv->id.event_handler(id, event);
1611}
1612
1613static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1614                              struct cma_device *cma_dev)
1615{
1616        struct rdma_id_private *dev_id_priv;
1617        struct rdma_cm_id *id;
1618        int ret;
1619
1620        if (cma_family(id_priv) == AF_IB &&
1621            rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB)
1622                return;
1623
1624        id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
1625                            id_priv->id.qp_type);
1626        if (IS_ERR(id))
1627                return;
1628
1629        dev_id_priv = container_of(id, struct rdma_id_private, id);
1630
1631        dev_id_priv->state = RDMA_CM_ADDR_BOUND;
1632        memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
1633               rdma_addr_size(cma_src_addr(id_priv)));
1634
1635        cma_attach_to_dev(dev_id_priv, cma_dev);
1636        list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1637        atomic_inc(&id_priv->refcount);
1638        dev_id_priv->internal_id = 1;
1639        dev_id_priv->afonly = id_priv->afonly;
1640
1641        ret = rdma_listen(id, id_priv->backlog);
1642        if (ret)
1643                printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1644                       "listening on device %s\n", ret, cma_dev->device->name);
1645}
1646
1647static void cma_listen_on_all(struct rdma_id_private *id_priv)
1648{
1649        struct cma_device *cma_dev;
1650
1651        mutex_lock(&lock);
1652        list_add_tail(&id_priv->list, &listen_any_list);
1653        list_for_each_entry(cma_dev, &dev_list, list)
1654                cma_listen_on_dev(id_priv, cma_dev);
1655        mutex_unlock(&lock);
1656}
1657
1658void rdma_set_service_type(struct rdma_cm_id *id, int tos)
1659{
1660        struct rdma_id_private *id_priv;
1661
1662        id_priv = container_of(id, struct rdma_id_private, id);
1663        id_priv->tos = (u8) tos;
1664}
1665EXPORT_SYMBOL(rdma_set_service_type);
1666
1667static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1668                              void *context)
1669{
1670        struct cma_work *work = context;
1671        struct rdma_route *route;
1672
1673        route = &work->id->id.route;
1674
1675        if (!status) {
1676                route->num_paths = 1;
1677                *route->path_rec = *path_rec;
1678        } else {
1679                work->old_state = RDMA_CM_ROUTE_QUERY;
1680                work->new_state = RDMA_CM_ADDR_RESOLVED;
1681                work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1682                work->event.status = status;
1683        }
1684
1685        queue_work(cma_wq, &work->work);
1686}
1687
1688static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1689                              struct cma_work *work)
1690{
1691        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
1692        struct ib_sa_path_rec path_rec;
1693        ib_sa_comp_mask comp_mask;
1694        struct sockaddr_in6 *sin6;
1695        struct sockaddr_ib *sib;
1696
1697        memset(&path_rec, 0, sizeof path_rec);
1698        rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
1699        rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
1700        path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
1701        path_rec.numb_path = 1;
1702        path_rec.reversible = 1;
1703        path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
1704
1705        comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1706                    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1707                    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1708
1709        switch (cma_family(id_priv)) {
1710        case AF_INET:
1711                path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1712                comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1713                break;
1714        case AF_INET6:
1715                sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
1716                path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1717                comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1718                break;
1719        case AF_IB:
1720                sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
1721                path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20);
1722                comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1723                break;
1724        }
1725
1726        id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1727                                               id_priv->id.port_num, &path_rec,
1728                                               comp_mask, timeout_ms,
1729                                               GFP_KERNEL, cma_query_handler,
1730                                               work, &id_priv->query);
1731
1732        return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1733}
1734
1735static void cma_work_handler(struct work_struct *_work)
1736{
1737        struct cma_work *work = container_of(_work, struct cma_work, work);
1738        struct rdma_id_private *id_priv = work->id;
1739        int destroy = 0;
1740
1741        mutex_lock(&id_priv->handler_mutex);
1742        if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1743                goto out;
1744
1745        if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1746                cma_exch(id_priv, RDMA_CM_DESTROYING);
1747                destroy = 1;
1748        }
1749out:
1750        mutex_unlock(&id_priv->handler_mutex);
1751        cma_deref_id(id_priv);
1752        if (destroy)
1753                rdma_destroy_id(&id_priv->id);
1754        kfree(work);
1755}
1756
1757static void cma_ndev_work_handler(struct work_struct *_work)
1758{
1759        struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
1760        struct rdma_id_private *id_priv = work->id;
1761        int destroy = 0;
1762
1763        mutex_lock(&id_priv->handler_mutex);
1764        if (id_priv->state == RDMA_CM_DESTROYING ||
1765            id_priv->state == RDMA_CM_DEVICE_REMOVAL)
1766                goto out;
1767
1768        if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1769                cma_exch(id_priv, RDMA_CM_DESTROYING);
1770                destroy = 1;
1771        }
1772
1773out:
1774        mutex_unlock(&id_priv->handler_mutex);
1775        cma_deref_id(id_priv);
1776        if (destroy)
1777                rdma_destroy_id(&id_priv->id);
1778        kfree(work);
1779}
1780
1781static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1782{
1783        struct rdma_route *route = &id_priv->id.route;
1784        struct cma_work *work;
1785        int ret;
1786
1787        work = kzalloc(sizeof *work, GFP_KERNEL);
1788        if (!work)
1789                return -ENOMEM;
1790
1791        work->id = id_priv;
1792        INIT_WORK(&work->work, cma_work_handler);
1793        work->old_state = RDMA_CM_ROUTE_QUERY;
1794        work->new_state = RDMA_CM_ROUTE_RESOLVED;
1795        work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1796
1797        route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1798        if (!route->path_rec) {
1799                ret = -ENOMEM;
1800                goto err1;
1801        }
1802
1803        ret = cma_query_ib_route(id_priv, timeout_ms, work);
1804        if (ret)
1805                goto err2;
1806
1807        return 0;
1808err2:
1809        kfree(route->path_rec);
1810        route->path_rec = NULL;
1811err1:
1812        kfree(work);
1813        return ret;
1814}
1815
1816int rdma_set_ib_paths(struct rdma_cm_id *id,
1817                      struct ib_sa_path_rec *path_rec, int num_paths)
1818{
1819        struct rdma_id_private *id_priv;
1820        int ret;
1821
1822        id_priv = container_of(id, struct rdma_id_private, id);
1823        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
1824                           RDMA_CM_ROUTE_RESOLVED))
1825                return -EINVAL;
1826
1827        id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
1828                                     GFP_KERNEL);
1829        if (!id->route.path_rec) {
1830                ret = -ENOMEM;
1831                goto err;
1832        }
1833
1834        id->route.num_paths = num_paths;
1835        return 0;
1836err:
1837        cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
1838        return ret;
1839}
1840EXPORT_SYMBOL(rdma_set_ib_paths);
1841
1842static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1843{
1844        struct cma_work *work;
1845
1846        work = kzalloc(sizeof *work, GFP_KERNEL);
1847        if (!work)
1848                return -ENOMEM;
1849
1850        work->id = id_priv;
1851        INIT_WORK(&work->work, cma_work_handler);
1852        work->old_state = RDMA_CM_ROUTE_QUERY;
1853        work->new_state = RDMA_CM_ROUTE_RESOLVED;
1854        work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1855        queue_work(cma_wq, &work->work);
1856        return 0;
1857}
1858
1859static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
1860{
1861        struct rdma_route *route = &id_priv->id.route;
1862        struct rdma_addr *addr = &route->addr;
1863        struct cma_work *work;
1864        int ret;
1865        struct net_device *ndev = NULL;
1866        u16 vid;
1867
1868        work = kzalloc(sizeof *work, GFP_KERNEL);
1869        if (!work)
1870                return -ENOMEM;
1871
1872        work->id = id_priv;
1873        INIT_WORK(&work->work, cma_work_handler);
1874
1875        route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
1876        if (!route->path_rec) {
1877                ret = -ENOMEM;
1878                goto err1;
1879        }
1880
1881        route->num_paths = 1;
1882
1883        if (addr->dev_addr.bound_dev_if)
1884                ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
1885        if (!ndev) {
1886                ret = -ENODEV;
1887                goto err2;
1888        }
1889
1890        vid = rdma_vlan_dev_vlan_id(ndev);
1891
1892        iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
1893        iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
1894
1895        route->path_rec->hop_limit = 1;
1896        route->path_rec->reversible = 1;
1897        route->path_rec->pkey = cpu_to_be16(0xffff);
1898        route->path_rec->mtu_selector = IB_SA_EQ;
1899        route->path_rec->sl = netdev_get_prio_tc_map(
1900                        ndev->priv_flags & IFF_802_1Q_VLAN ?
1901                                vlan_dev_real_dev(ndev) : ndev,
1902                        rt_tos2priority(id_priv->tos));
1903
1904        route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
1905        route->path_rec->rate_selector = IB_SA_EQ;
1906        route->path_rec->rate = iboe_get_rate(ndev);
1907        dev_put(ndev);
1908        route->path_rec->packet_life_time_selector = IB_SA_EQ;
1909        route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
1910        if (!route->path_rec->mtu) {
1911                ret = -EINVAL;
1912                goto err2;
1913        }
1914
1915        work->old_state = RDMA_CM_ROUTE_QUERY;
1916        work->new_state = RDMA_CM_ROUTE_RESOLVED;
1917        work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1918        work->event.status = 0;
1919
1920        queue_work(cma_wq, &work->work);
1921
1922        return 0;
1923
1924err2:
1925        kfree(route->path_rec);
1926        route->path_rec = NULL;
1927err1:
1928        kfree(work);
1929        return ret;
1930}
1931
1932int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1933{
1934        struct rdma_id_private *id_priv;
1935        int ret;
1936
1937        id_priv = container_of(id, struct rdma_id_private, id);
1938        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
1939                return -EINVAL;
1940
1941        atomic_inc(&id_priv->refcount);
1942        switch (rdma_node_get_transport(id->device->node_type)) {
1943        case RDMA_TRANSPORT_IB:
1944                switch (rdma_port_get_link_layer(id->device, id->port_num)) {
1945                case IB_LINK_LAYER_INFINIBAND:
1946                        ret = cma_resolve_ib_route(id_priv, timeout_ms);
1947                        break;
1948                case IB_LINK_LAYER_ETHERNET:
1949                        ret = cma_resolve_iboe_route(id_priv);
1950                        break;
1951                default:
1952                        ret = -ENOSYS;
1953                }
1954                break;
1955        case RDMA_TRANSPORT_IWARP:
1956                ret = cma_resolve_iw_route(id_priv, timeout_ms);
1957                break;
1958        default:
1959                ret = -ENOSYS;
1960                break;
1961        }
1962        if (ret)
1963                goto err;
1964
1965        return 0;
1966err:
1967        cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
1968        cma_deref_id(id_priv);
1969        return ret;
1970}
1971EXPORT_SYMBOL(rdma_resolve_route);
1972
1973static void cma_set_loopback(struct sockaddr *addr)
1974{
1975        switch (addr->sa_family) {
1976        case AF_INET:
1977                ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1978                break;
1979        case AF_INET6:
1980                ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr,
1981                              0, 0, 0, htonl(1));
1982                break;
1983        default:
1984                ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr,
1985                            0, 0, 0, htonl(1));
1986                break;
1987        }
1988}
1989
1990static int cma_bind_loopback(struct rdma_id_private *id_priv)
1991{
1992        struct cma_device *cma_dev, *cur_dev;
1993        struct ib_port_attr port_attr;
1994        union ib_gid gid;
1995        u16 pkey;
1996        int ret;
1997        u8 p;
1998
1999        cma_dev = NULL;
2000        mutex_lock(&lock);
2001        list_for_each_entry(cur_dev, &dev_list, list) {
2002                if (cma_family(id_priv) == AF_IB &&
2003                    rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
2004                        continue;
2005
2006                if (!cma_dev)
2007                        cma_dev = cur_dev;
2008
2009                for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
2010                        if (!ib_query_port(cur_dev->device, p, &port_attr) &&
2011                            port_attr.state == IB_PORT_ACTIVE) {
2012                                cma_dev = cur_dev;
2013                                goto port_found;
2014                        }
2015                }
2016        }
2017
2018        if (!cma_dev) {
2019                ret = -ENODEV;
2020                goto out;
2021        }
2022
2023        p = 1;
2024
2025port_found:
2026        ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
2027        if (ret)
2028                goto out;
2029
2030        ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
2031        if (ret)
2032                goto out;
2033
2034        id_priv->id.route.addr.dev_addr.dev_type =
2035                (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
2036                ARPHRD_INFINIBAND : ARPHRD_ETHER;
2037
2038        rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2039        ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
2040        id_priv->id.port_num = p;
2041        cma_attach_to_dev(id_priv, cma_dev);
2042        cma_set_loopback(cma_src_addr(id_priv));
2043out:
2044        mutex_unlock(&lock);
2045        return ret;
2046}
2047
2048static void addr_handler(int status, struct sockaddr *src_addr,
2049                         struct rdma_dev_addr *dev_addr, void *context)
2050{
2051        struct rdma_id_private *id_priv = context;
2052        struct rdma_cm_event event;
2053
2054        memset(&event, 0, sizeof event);
2055        mutex_lock(&id_priv->handler_mutex);
2056        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
2057                           RDMA_CM_ADDR_RESOLVED))
2058                goto out;
2059
2060        if (!status && !id_priv->cma_dev)
2061                status = cma_acquire_dev(id_priv);
2062
2063        if (status) {
2064                if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
2065                                   RDMA_CM_ADDR_BOUND))
2066                        goto out;
2067                event.event = RDMA_CM_EVENT_ADDR_ERROR;
2068                event.status = status;
2069        } else {
2070                memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
2071                event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2072        }
2073
2074        if (id_priv->id.event_handler(&id_priv->id, &event)) {
2075                cma_exch(id_priv, RDMA_CM_DESTROYING);
2076                mutex_unlock(&id_priv->handler_mutex);
2077                cma_deref_id(id_priv);
2078                rdma_destroy_id(&id_priv->id);
2079                return;
2080        }
2081out:
2082        mutex_unlock(&id_priv->handler_mutex);
2083        cma_deref_id(id_priv);
2084}
2085
2086static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2087{
2088        struct cma_work *work;
2089        union ib_gid gid;
2090        int ret;
2091
2092        work = kzalloc(sizeof *work, GFP_KERNEL);
2093        if (!work)
2094                return -ENOMEM;
2095
2096        if (!id_priv->cma_dev) {
2097                ret = cma_bind_loopback(id_priv);
2098                if (ret)
2099                        goto err;
2100        }
2101
2102        rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2103        rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
2104
2105        work->id = id_priv;
2106        INIT_WORK(&work->work, cma_work_handler);
2107        work->old_state = RDMA_CM_ADDR_QUERY;
2108        work->new_state = RDMA_CM_ADDR_RESOLVED;
2109        work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2110        queue_work(cma_wq, &work->work);
2111        return 0;
2112err:
2113        kfree(work);
2114        return ret;
2115}
2116
2117static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
2118{
2119        struct cma_work *work;
2120        int ret;
2121
2122        work = kzalloc(sizeof *work, GFP_KERNEL);
2123        if (!work)
2124                return -ENOMEM;
2125
2126        if (!id_priv->cma_dev) {
2127                ret = cma_resolve_ib_dev(id_priv);
2128                if (ret)
2129                        goto err;
2130        }
2131
2132        rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
2133                &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
2134
2135        work->id = id_priv;
2136        INIT_WORK(&work->work, cma_work_handler);
2137        work->old_state = RDMA_CM_ADDR_QUERY;
2138        work->new_state = RDMA_CM_ADDR_RESOLVED;
2139        work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2140        queue_work(cma_wq, &work->work);
2141        return 0;
2142err:
2143        kfree(work);
2144        return ret;
2145}
2146
2147static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2148                         struct sockaddr *dst_addr)
2149{
2150        if (!src_addr || !src_addr->sa_family) {
2151                src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2152                src_addr->sa_family = dst_addr->sa_family;
2153                if (dst_addr->sa_family == AF_INET6) {
2154                        ((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
2155                                ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
2156                } else if (dst_addr->sa_family == AF_IB) {
2157                        ((struct sockaddr_ib *) src_addr)->sib_pkey =
2158                                ((struct sockaddr_ib *) dst_addr)->sib_pkey;
2159                }
2160        }
2161        return rdma_bind_addr(id, src_addr);
2162}
2163
2164int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2165                      struct sockaddr *dst_addr, int timeout_ms)
2166{
2167        struct rdma_id_private *id_priv;
2168        int ret;
2169
2170        id_priv = container_of(id, struct rdma_id_private, id);
2171        if (id_priv->state == RDMA_CM_IDLE) {
2172                ret = cma_bind_addr(id, src_addr, dst_addr);
2173                if (ret)
2174                        return ret;
2175        }
2176
2177        if (cma_family(id_priv) != dst_addr->sa_family)
2178                return -EINVAL;
2179
2180        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
2181                return -EINVAL;
2182
2183        atomic_inc(&id_priv->refcount);
2184        memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
2185        if (cma_any_addr(dst_addr)) {
2186                ret = cma_resolve_loopback(id_priv);
2187        } else {
2188                if (dst_addr->sa_family == AF_IB) {
2189                        ret = cma_resolve_ib_addr(id_priv);
2190                } else {
2191                        ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
2192                                              dst_addr, &id->route.addr.dev_addr,
2193                                              timeout_ms, addr_handler, id_priv);
2194                }
2195        }
2196        if (ret)
2197                goto err;
2198
2199        return 0;
2200err:
2201        cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
2202        cma_deref_id(id_priv);
2203        return ret;
2204}
2205EXPORT_SYMBOL(rdma_resolve_addr);
2206
2207int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
2208{
2209        struct rdma_id_private *id_priv;
2210        unsigned long flags;
2211        int ret;
2212
2213        id_priv = container_of(id, struct rdma_id_private, id);
2214        spin_lock_irqsave(&id_priv->lock, flags);
2215        if (reuse || id_priv->state == RDMA_CM_IDLE) {
2216                id_priv->reuseaddr = reuse;
2217                ret = 0;
2218        } else {
2219                ret = -EINVAL;
2220        }
2221        spin_unlock_irqrestore(&id_priv->lock, flags);
2222        return ret;
2223}
2224EXPORT_SYMBOL(rdma_set_reuseaddr);
2225
2226int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
2227{
2228        struct rdma_id_private *id_priv;
2229        unsigned long flags;
2230        int ret;
2231
2232        id_priv = container_of(id, struct rdma_id_private, id);
2233        spin_lock_irqsave(&id_priv->lock, flags);
2234        if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
2235                id_priv->options |= (1 << CMA_OPTION_AFONLY);
2236                id_priv->afonly = afonly;
2237                ret = 0;
2238        } else {
2239                ret = -EINVAL;
2240        }
2241        spin_unlock_irqrestore(&id_priv->lock, flags);
2242        return ret;
2243}
2244EXPORT_SYMBOL(rdma_set_afonly);
2245
2246static void cma_bind_port(struct rdma_bind_list *bind_list,
2247                          struct rdma_id_private *id_priv)
2248{
2249        struct sockaddr *addr;
2250        struct sockaddr_ib *sib;
2251        u64 sid, mask;
2252        __be16 port;
2253
2254        addr = cma_src_addr(id_priv);
2255        port = htons(bind_list->port);
2256
2257        switch (addr->sa_family) {
2258        case AF_INET:
2259                ((struct sockaddr_in *) addr)->sin_port = port;
2260                break;
2261        case AF_INET6:
2262                ((struct sockaddr_in6 *) addr)->sin6_port = port;
2263                break;
2264        case AF_IB:
2265                sib = (struct sockaddr_ib *) addr;
2266                sid = be64_to_cpu(sib->sib_sid);
2267                mask = be64_to_cpu(sib->sib_sid_mask);
2268                sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port));
2269                sib->sib_sid_mask = cpu_to_be64(~0ULL);
2270                break;
2271        }
2272        id_priv->bind_list = bind_list;
2273        hlist_add_head(&id_priv->node, &bind_list->owners);
2274}
2275
2276static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
2277                          unsigned short snum)
2278{
2279        struct rdma_bind_list *bind_list;
2280        int ret;
2281
2282        bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
2283        if (!bind_list)
2284                return -ENOMEM;
2285
2286        ret = idr_alloc(ps, bind_list, snum, snum + 1, GFP_KERNEL);
2287        if (ret < 0)
2288                goto err;
2289
2290        bind_list->ps = ps;
2291        bind_list->port = (unsigned short)ret;
2292        cma_bind_port(bind_list, id_priv);
2293        return 0;
2294err:
2295        kfree(bind_list);
2296        return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
2297}
2298
2299static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
2300{
2301        static unsigned int last_used_port;
2302        int low, high, remaining;
2303        unsigned int rover;
2304
2305        inet_get_local_port_range(&low, &high);
2306        remaining = (high - low) + 1;
2307        rover = net_random() % remaining + low;
2308retry:
2309        if (last_used_port != rover &&
2310            !idr_find(ps, (unsigned short) rover)) {
2311                int ret = cma_alloc_port(ps, id_priv, rover);
2312                /*
2313                 * Remember previously used port number in order to avoid
2314                 * re-using same port immediately after it is closed.
2315                 */
2316                if (!ret)
2317                        last_used_port = rover;
2318                if (ret != -EADDRNOTAVAIL)
2319                        return ret;
2320        }
2321        if (--remaining) {
2322                rover++;
2323                if ((rover < low) || (rover > high))
2324                        rover = low;
2325                goto retry;
2326        }
2327        return -EADDRNOTAVAIL;
2328}
2329
2330/*
2331 * Check that the requested port is available.  This is called when trying to
2332 * bind to a specific port, or when trying to listen on a bound port.  In
2333 * the latter case, the provided id_priv may already be on the bind_list, but
2334 * we still need to check that it's okay to start listening.
2335 */
2336static int cma_check_port(struct rdma_bind_list *bind_list,
2337                          struct rdma_id_private *id_priv, uint8_t reuseaddr)
2338{
2339        struct rdma_id_private *cur_id;
2340        struct sockaddr *addr, *cur_addr;
2341
2342        addr = cma_src_addr(id_priv);
2343        hlist_for_each_entry(cur_id, &bind_list->owners, node) {
2344                if (id_priv == cur_id)
2345                        continue;
2346
2347                if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
2348                    cur_id->reuseaddr)
2349                        continue;
2350
2351                cur_addr = cma_src_addr(cur_id);
2352                if (id_priv->afonly && cur_id->afonly &&
2353                    (addr->sa_family != cur_addr->sa_family))
2354                        continue;
2355
2356                if (cma_any_addr(addr) || cma_any_addr(cur_addr))
2357                        return -EADDRNOTAVAIL;
2358
2359                if (!cma_addr_cmp(addr, cur_addr))
2360                        return -EADDRINUSE;
2361        }
2362        return 0;
2363}
2364
2365static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
2366{
2367        struct rdma_bind_list *bind_list;
2368        unsigned short snum;
2369        int ret;
2370
2371        snum = ntohs(cma_port(cma_src_addr(id_priv)));
2372        if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
2373                return -EACCES;
2374
2375        bind_list = idr_find(ps, snum);
2376        if (!bind_list) {
2377                ret = cma_alloc_port(ps, id_priv, snum);
2378        } else {
2379                ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
2380                if (!ret)
2381                        cma_bind_port(bind_list, id_priv);
2382        }
2383        return ret;
2384}
2385
2386static int cma_bind_listen(struct rdma_id_private *id_priv)
2387{
2388        struct rdma_bind_list *bind_list = id_priv->bind_list;
2389        int ret = 0;
2390
2391        mutex_lock(&lock);
2392        if (bind_list->owners.first->next)
2393                ret = cma_check_port(bind_list, id_priv, 0);
2394        mutex_unlock(&lock);
2395        return ret;
2396}
2397
2398static struct idr *cma_select_inet_ps(struct rdma_id_private *id_priv)
2399{
2400        switch (id_priv->id.ps) {
2401        case RDMA_PS_TCP:
2402                return &tcp_ps;
2403        case RDMA_PS_UDP:
2404                return &udp_ps;
2405        case RDMA_PS_IPOIB:
2406                return &ipoib_ps;
2407        case RDMA_PS_IB:
2408                return &ib_ps;
2409        default:
2410                return NULL;
2411        }
2412}
2413
2414static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv)
2415{
2416        struct idr *ps = NULL;
2417        struct sockaddr_ib *sib;
2418        u64 sid_ps, mask, sid;
2419
2420        sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
2421        mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK;
2422        sid = be64_to_cpu(sib->sib_sid) & mask;
2423
2424        if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
2425                sid_ps = RDMA_IB_IP_PS_IB;
2426                ps = &ib_ps;
2427        } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
2428                   (sid == (RDMA_IB_IP_PS_TCP & mask))) {
2429                sid_ps = RDMA_IB_IP_PS_TCP;
2430                ps = &tcp_ps;
2431        } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
2432                   (sid == (RDMA_IB_IP_PS_UDP & mask))) {
2433                sid_ps = RDMA_IB_IP_PS_UDP;
2434                ps = &udp_ps;
2435        }
2436
2437        if (ps) {
2438                sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib)));
2439                sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK |
2440                                                be64_to_cpu(sib->sib_sid_mask));
2441        }
2442        return ps;
2443}
2444
2445static int cma_get_port(struct rdma_id_private *id_priv)
2446{
2447        struct idr *ps;
2448        int ret;
2449
2450        if (cma_family(id_priv) != AF_IB)
2451                ps = cma_select_inet_ps(id_priv);
2452        else
2453                ps = cma_select_ib_ps(id_priv);
2454        if (!ps)
2455                return -EPROTONOSUPPORT;
2456
2457        mutex_lock(&lock);
2458        if (cma_any_port(cma_src_addr(id_priv)))
2459                ret = cma_alloc_any_port(ps, id_priv);
2460        else
2461                ret = cma_use_port(ps, id_priv);
2462        mutex_unlock(&lock);
2463
2464        return ret;
2465}
2466
2467static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
2468                               struct sockaddr *addr)
2469{
2470#if IS_ENABLED(CONFIG_IPV6)
2471        struct sockaddr_in6 *sin6;
2472
2473        if (addr->sa_family != AF_INET6)
2474                return 0;
2475
2476        sin6 = (struct sockaddr_in6 *) addr;
2477        if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
2478            !sin6->sin6_scope_id)
2479                        return -EINVAL;
2480
2481        dev_addr->bound_dev_if = sin6->sin6_scope_id;
2482#endif
2483        return 0;
2484}
2485
2486int rdma_listen(struct rdma_cm_id *id, int backlog)
2487{
2488        struct rdma_id_private *id_priv;
2489        int ret;
2490
2491        id_priv = container_of(id, struct rdma_id_private, id);
2492        if (id_priv->state == RDMA_CM_IDLE) {
2493                id->route.addr.src_addr.ss_family = AF_INET;
2494                ret = rdma_bind_addr(id, cma_src_addr(id_priv));
2495                if (ret)
2496                        return ret;
2497        }
2498
2499        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
2500                return -EINVAL;
2501
2502        if (id_priv->reuseaddr) {
2503                ret = cma_bind_listen(id_priv);
2504                if (ret)
2505                        goto err;
2506        }
2507
2508        id_priv->backlog = backlog;
2509        if (id->device) {
2510                switch (rdma_node_get_transport(id->device->node_type)) {
2511                case RDMA_TRANSPORT_IB:
2512                        ret = cma_ib_listen(id_priv);
2513                        if (ret)
2514                                goto err;
2515                        break;
2516                case RDMA_TRANSPORT_IWARP:
2517                        ret = cma_iw_listen(id_priv, backlog);
2518                        if (ret)
2519                                goto err;
2520                        break;
2521                default:
2522                        ret = -ENOSYS;
2523                        goto err;
2524                }
2525        } else
2526                cma_listen_on_all(id_priv);
2527
2528        return 0;
2529err:
2530        id_priv->backlog = 0;
2531        cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
2532        return ret;
2533}
2534EXPORT_SYMBOL(rdma_listen);
2535
2536int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2537{
2538        struct rdma_id_private *id_priv;
2539        int ret;
2540
2541        if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
2542            addr->sa_family != AF_IB)
2543                return -EAFNOSUPPORT;
2544
2545        id_priv = container_of(id, struct rdma_id_private, id);
2546        if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
2547                return -EINVAL;
2548
2549        ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
2550        if (ret)
2551                goto err1;
2552
2553        if (!cma_any_addr(addr)) {
2554                ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
2555                if (ret)
2556                        goto err1;
2557
2558                ret = cma_acquire_dev(id_priv);
2559                if (ret)
2560                        goto err1;
2561        }
2562
2563        memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
2564        if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
2565                if (addr->sa_family == AF_INET)
2566                        id_priv->afonly = 1;
2567#if IS_ENABLED(CONFIG_IPV6)
2568                else if (addr->sa_family == AF_INET6)
2569                        id_priv->afonly = init_net.ipv6.sysctl.bindv6only;
2570#endif
2571        }
2572        ret = cma_get_port(id_priv);
2573        if (ret)
2574                goto err2;
2575
2576        return 0;
2577err2:
2578        if (id_priv->cma_dev)
2579                cma_release_dev(id_priv);
2580err1:
2581        cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
2582        return ret;
2583}
2584EXPORT_SYMBOL(rdma_bind_addr);
2585
2586static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
2587{
2588        struct cma_hdr *cma_hdr;
2589
2590        cma_hdr = hdr;
2591        cma_hdr->cma_version = CMA_VERSION;
2592        if (cma_family(id_priv) == AF_INET) {
2593                struct sockaddr_in *src4, *dst4;
2594
2595                src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
2596                dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
2597
2598                cma_set_ip_ver(cma_hdr, 4);
2599                cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2600                cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2601                cma_hdr->port = src4->sin_port;
2602        } else if (cma_family(id_priv) == AF_INET6) {
2603                struct sockaddr_in6 *src6, *dst6;
2604
2605                src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
2606                dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
2607
2608                cma_set_ip_ver(cma_hdr, 6);
2609                cma_hdr->src_addr.ip6 = src6->sin6_addr;
2610                cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
2611                cma_hdr->port = src6->sin6_port;
2612        }
2613        return 0;
2614}
2615
2616static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2617                                struct ib_cm_event *ib_event)
2618{
2619        struct rdma_id_private *id_priv = cm_id->context;
2620        struct rdma_cm_event event;
2621        struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2622        int ret = 0;
2623
2624        if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
2625                return 0;
2626
2627        memset(&event, 0, sizeof event);
2628        switch (ib_event->event) {
2629        case IB_CM_SIDR_REQ_ERROR:
2630                event.event = RDMA_CM_EVENT_UNREACHABLE;
2631                event.status = -ETIMEDOUT;
2632                break;
2633        case IB_CM_SIDR_REP_RECEIVED:
2634                event.param.ud.private_data = ib_event->private_data;
2635                event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2636                if (rep->status != IB_SIDR_SUCCESS) {
2637                        event.event = RDMA_CM_EVENT_UNREACHABLE;
2638                        event.status = ib_event->param.sidr_rep_rcvd.status;
2639                        break;
2640                }
2641                ret = cma_set_qkey(id_priv, rep->qkey);
2642                if (ret) {
2643                        event.event = RDMA_CM_EVENT_ADDR_ERROR;
2644                        event.status = ret;
2645                        break;
2646                }
2647                ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2648                                     id_priv->id.route.path_rec,
2649                                     &event.param.ud.ah_attr);
2650                event.param.ud.qp_num = rep->qpn;
2651                event.param.ud.qkey = rep->qkey;
2652                event.event = RDMA_CM_EVENT_ESTABLISHED;
2653                event.status = 0;
2654                break;
2655        default:
2656                printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
2657                       ib_event->event);
2658                goto out;
2659        }
2660
2661        ret = id_priv->id.event_handler(&id_priv->id, &event);
2662        if (ret) {
2663                /* Destroy the CM ID by returning a non-zero value. */
2664                id_priv->cm_id.ib = NULL;
2665                cma_exch(id_priv, RDMA_CM_DESTROYING);
2666                mutex_unlock(&id_priv->handler_mutex);
2667                rdma_destroy_id(&id_priv->id);
2668                return ret;
2669        }
2670out:
2671        mutex_unlock(&id_priv->handler_mutex);
2672        return ret;
2673}
2674
2675static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2676                              struct rdma_conn_param *conn_param)
2677{
2678        struct ib_cm_sidr_req_param req;
2679        struct ib_cm_id *id;
2680        void *private_data;
2681        int offset, ret;
2682
2683        memset(&req, 0, sizeof req);
2684        offset = cma_user_data_offset(id_priv);
2685        req.private_data_len = offset + conn_param->private_data_len;
2686        if (req.private_data_len < conn_param->private_data_len)
2687                return -EINVAL;
2688
2689        if (req.private_data_len) {
2690                private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2691                if (!private_data)
2692                        return -ENOMEM;
2693        } else {
2694                private_data = NULL;
2695        }
2696
2697        if (conn_param->private_data && conn_param->private_data_len)
2698                memcpy(private_data + offset, conn_param->private_data,
2699                       conn_param->private_data_len);
2700
2701        if (private_data) {
2702                ret = cma_format_hdr(private_data, id_priv);
2703                if (ret)
2704                        goto out;
2705                req.private_data = private_data;
2706        }
2707
2708        id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
2709                             id_priv);
2710        if (IS_ERR(id)) {
2711                ret = PTR_ERR(id);
2712                goto out;
2713        }
2714        id_priv->cm_id.ib = id;
2715
2716        req.path = id_priv->id.route.path_rec;
2717        req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
2718        req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2719        req.max_cm_retries = CMA_MAX_CM_RETRIES;
2720
2721        ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2722        if (ret) {
2723                ib_destroy_cm_id(id_priv->cm_id.ib);
2724                id_priv->cm_id.ib = NULL;
2725        }
2726out:
2727        kfree(private_data);
2728        return ret;
2729}
2730
2731static int cma_connect_ib(struct rdma_id_private *id_priv,
2732                          struct rdma_conn_param *conn_param)
2733{
2734        struct ib_cm_req_param req;
2735        struct rdma_route *route;
2736        void *private_data;
2737        struct ib_cm_id *id;
2738        int offset, ret;
2739
2740        memset(&req, 0, sizeof req);
2741        offset = cma_user_data_offset(id_priv);
2742        req.private_data_len = offset + conn_param->private_data_len;
2743        if (req.private_data_len < conn_param->private_data_len)
2744                return -EINVAL;
2745
2746        if (req.private_data_len) {
2747                private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2748                if (!private_data)
2749                        return -ENOMEM;
2750        } else {
2751                private_data = NULL;
2752        }
2753
2754        if (conn_param->private_data && conn_param->private_data_len)
2755                memcpy(private_data + offset, conn_param->private_data,
2756                       conn_param->private_data_len);
2757
2758        id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv);
2759        if (IS_ERR(id)) {
2760                ret = PTR_ERR(id);
2761                goto out;
2762        }
2763        id_priv->cm_id.ib = id;
2764
2765        route = &id_priv->id.route;
2766        if (private_data) {
2767                ret = cma_format_hdr(private_data, id_priv);
2768                if (ret)
2769                        goto out;
2770                req.private_data = private_data;
2771        }
2772
2773        req.primary_path = &route->path_rec[0];
2774        if (route->num_paths == 2)
2775                req.alternate_path = &route->path_rec[1];
2776
2777        req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
2778        req.qp_num = id_priv->qp_num;
2779        req.qp_type = id_priv->id.qp_type;
2780        req.starting_psn = id_priv->seq_num;
2781        req.responder_resources = conn_param->responder_resources;
2782        req.initiator_depth = conn_param->initiator_depth;
2783        req.flow_control = conn_param->flow_control;
2784        req.retry_count = min_t(u8, 7, conn_param->retry_count);
2785        req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
2786        req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2787        req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2788        req.max_cm_retries = CMA_MAX_CM_RETRIES;
2789        req.srq = id_priv->srq ? 1 : 0;
2790
2791        ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2792out:
2793        if (ret && !IS_ERR(id)) {
2794                ib_destroy_cm_id(id);
2795                id_priv->cm_id.ib = NULL;
2796        }
2797
2798        kfree(private_data);
2799        return ret;
2800}
2801
2802static int cma_connect_iw(struct rdma_id_private *id_priv,
2803                          struct rdma_conn_param *conn_param)
2804{
2805        struct iw_cm_id *cm_id;
2806        struct sockaddr_in* sin;
2807        int ret;
2808        struct iw_cm_conn_param iw_param;
2809
2810        cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
2811        if (IS_ERR(cm_id))
2812                return PTR_ERR(cm_id);
2813
2814        id_priv->cm_id.iw = cm_id;
2815
2816        sin = (struct sockaddr_in *) cma_src_addr(id_priv);
2817        cm_id->local_addr = *sin;
2818
2819        sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
2820        cm_id->remote_addr = *sin;
2821
2822        ret = cma_modify_qp_rtr(id_priv, conn_param);
2823        if (ret)
2824                goto out;
2825
2826        if (conn_param) {
2827                iw_param.ord = conn_param->initiator_depth;
2828                iw_param.ird = conn_param->responder_resources;
2829                iw_param.private_data = conn_param->private_data;
2830                iw_param.private_data_len = conn_param->private_data_len;
2831                iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
2832        } else {
2833                memset(&iw_param, 0, sizeof iw_param);
2834                iw_param.qpn = id_priv->qp_num;
2835        }
2836        ret = iw_cm_connect(cm_id, &iw_param);
2837out:
2838        if (ret) {
2839                iw_destroy_cm_id(cm_id);
2840                id_priv->cm_id.iw = NULL;
2841        }
2842        return ret;
2843}
2844
2845int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2846{
2847        struct rdma_id_private *id_priv;
2848        int ret;
2849
2850        id_priv = container_of(id, struct rdma_id_private, id);
2851        if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
2852                return -EINVAL;
2853
2854        if (!id->qp) {
2855                id_priv->qp_num = conn_param->qp_num;
2856                id_priv->srq = conn_param->srq;
2857        }
2858
2859        switch (rdma_node_get_transport(id->device->node_type)) {
2860        case RDMA_TRANSPORT_IB:
2861                if (id->qp_type == IB_QPT_UD)
2862                        ret = cma_resolve_ib_udp(id_priv, conn_param);
2863                else
2864                        ret = cma_connect_ib(id_priv, conn_param);
2865                break;
2866        case RDMA_TRANSPORT_IWARP:
2867                ret = cma_connect_iw(id_priv, conn_param);
2868                break;
2869        default:
2870                ret = -ENOSYS;
2871                break;
2872        }
2873        if (ret)
2874                goto err;
2875
2876        return 0;
2877err:
2878        cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
2879        return ret;
2880}
2881EXPORT_SYMBOL(rdma_connect);
2882
2883static int cma_accept_ib(struct rdma_id_private *id_priv,
2884                         struct rdma_conn_param *conn_param)
2885{
2886        struct ib_cm_rep_param rep;
2887        int ret;
2888
2889        ret = cma_modify_qp_rtr(id_priv, conn_param);
2890        if (ret)
2891                goto out;
2892
2893        ret = cma_modify_qp_rts(id_priv, conn_param);
2894        if (ret)
2895                goto out;
2896
2897        memset(&rep, 0, sizeof rep);
2898        rep.qp_num = id_priv->qp_num;
2899        rep.starting_psn = id_priv->seq_num;
2900        rep.private_data = conn_param->private_data;
2901        rep.private_data_len = conn_param->private_data_len;
2902        rep.responder_resources = conn_param->responder_resources;
2903        rep.initiator_depth = conn_param->initiator_depth;
2904        rep.failover_accepted = 0;
2905        rep.flow_control = conn_param->flow_control;
2906        rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
2907        rep.srq = id_priv->srq ? 1 : 0;
2908
2909        ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2910out:
2911        return ret;
2912}
2913
2914static int cma_accept_iw(struct rdma_id_private *id_priv,
2915                  struct rdma_conn_param *conn_param)
2916{
2917        struct iw_cm_conn_param iw_param;
2918        int ret;
2919
2920        ret = cma_modify_qp_rtr(id_priv, conn_param);
2921        if (ret)
2922                return ret;
2923
2924        iw_param.ord = conn_param->initiator_depth;
2925        iw_param.ird = conn_param->responder_resources;
2926        iw_param.private_data = conn_param->private_data;
2927        iw_param.private_data_len = conn_param->private_data_len;
2928        if (id_priv->id.qp) {
2929                iw_param.qpn = id_priv->qp_num;
2930        } else
2931                iw_param.qpn = conn_param->qp_num;
2932
2933        return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2934}
2935
2936static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2937                             enum ib_cm_sidr_status status, u32 qkey,
2938                             const void *private_data, int private_data_len)
2939{
2940        struct ib_cm_sidr_rep_param rep;
2941        int ret;
2942
2943        memset(&rep, 0, sizeof rep);
2944        rep.status = status;
2945        if (status == IB_SIDR_SUCCESS) {
2946                ret = cma_set_qkey(id_priv, qkey);
2947                if (ret)
2948                        return ret;
2949                rep.qp_num = id_priv->qp_num;
2950                rep.qkey = id_priv->qkey;
2951        }
2952        rep.private_data = private_data;
2953        rep.private_data_len = private_data_len;
2954
2955        return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2956}
2957
2958int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2959{
2960        struct rdma_id_private *id_priv;
2961        int ret;
2962
2963        id_priv = container_of(id, struct rdma_id_private, id);
2964
2965        id_priv->owner = task_pid_nr(current);
2966
2967        if (!cma_comp(id_priv, RDMA_CM_CONNECT))
2968                return -EINVAL;
2969
2970        if (!id->qp && conn_param) {
2971                id_priv->qp_num = conn_param->qp_num;
2972                id_priv->srq = conn_param->srq;
2973        }
2974
2975        switch (rdma_node_get_transport(id->device->node_type)) {
2976        case RDMA_TRANSPORT_IB:
2977                if (id->qp_type == IB_QPT_UD) {
2978                        if (conn_param)
2979                                ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2980                                                        conn_param->qkey,
2981                                                        conn_param->private_data,
2982                                                        conn_param->private_data_len);
2983                        else
2984                                ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2985                                                        0, NULL, 0);
2986                } else {
2987                        if (conn_param)
2988                                ret = cma_accept_ib(id_priv, conn_param);
2989                        else
2990                                ret = cma_rep_recv(id_priv);
2991                }
2992                break;
2993        case RDMA_TRANSPORT_IWARP:
2994                ret = cma_accept_iw(id_priv, conn_param);
2995                break;
2996        default:
2997                ret = -ENOSYS;
2998                break;
2999        }
3000
3001        if (ret)
3002                goto reject;
3003
3004        return 0;
3005reject:
3006        cma_modify_qp_err(id_priv);
3007        rdma_reject(id, NULL, 0);
3008        return ret;
3009}
3010EXPORT_SYMBOL(rdma_accept);
3011
3012int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
3013{
3014        struct rdma_id_private *id_priv;
3015        int ret;
3016
3017        id_priv = container_of(id, struct rdma_id_private, id);
3018        if (!id_priv->cm_id.ib)
3019                return -EINVAL;
3020
3021        switch (id->device->node_type) {
3022        case RDMA_NODE_IB_CA:
3023                ret = ib_cm_notify(id_priv->cm_id.ib, event);
3024                break;
3025        default:
3026                ret = 0;
3027                break;
3028        }
3029        return ret;
3030}
3031EXPORT_SYMBOL(rdma_notify);
3032
3033int rdma_reject(struct rdma_cm_id *id, const void *private_data,
3034                u8 private_data_len)
3035{
3036        struct rdma_id_private *id_priv;
3037        int ret;
3038
3039        id_priv = container_of(id, struct rdma_id_private, id);
3040        if (!id_priv->cm_id.ib)
3041                return -EINVAL;
3042
3043        switch (rdma_node_get_transport(id->device->node_type)) {
3044        case RDMA_TRANSPORT_IB:
3045                if (id->qp_type == IB_QPT_UD)
3046                        ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
3047                                                private_data, private_data_len);
3048                else
3049                        ret = ib_send_cm_rej(id_priv->cm_id.ib,
3050                                             IB_CM_REJ_CONSUMER_DEFINED, NULL,
3051                                             0, private_data, private_data_len);
3052                break;
3053        case RDMA_TRANSPORT_IWARP:
3054                ret = iw_cm_reject(id_priv->cm_id.iw,
3055                                   private_data, private_data_len);
3056                break;
3057        default:
3058                ret = -ENOSYS;
3059                break;
3060        }
3061        return ret;
3062}
3063EXPORT_SYMBOL(rdma_reject);
3064
3065int rdma_disconnect(struct rdma_cm_id *id)
3066{
3067        struct rdma_id_private *id_priv;
3068        int ret;
3069
3070        id_priv = container_of(id, struct rdma_id_private, id);
3071        if (!id_priv->cm_id.ib)
3072                return -EINVAL;
3073
3074        switch (rdma_node_get_transport(id->device->node_type)) {
3075        case RDMA_TRANSPORT_IB:
3076                ret = cma_modify_qp_err(id_priv);
3077                if (ret)
3078                        goto out;
3079                /* Initiate or respond to a disconnect. */
3080                if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
3081                        ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
3082                break;
3083        case RDMA_TRANSPORT_IWARP:
3084                ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
3085                break;
3086        default:
3087                ret = -EINVAL;
3088                break;
3089        }
3090out:
3091        return ret;
3092}
3093EXPORT_SYMBOL(rdma_disconnect);
3094
3095static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
3096{
3097        struct rdma_id_private *id_priv;
3098        struct cma_multicast *mc = multicast->context;
3099        struct rdma_cm_event event;
3100        int ret;
3101
3102        id_priv = mc->id_priv;
3103        if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
3104            cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
3105                return 0;
3106
3107        if (!status)
3108                status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
3109        mutex_lock(&id_priv->qp_mutex);
3110        if (!status && id_priv->id.qp)
3111                status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
3112                                         be16_to_cpu(multicast->rec.mlid));
3113        mutex_unlock(&id_priv->qp_mutex);
3114
3115        memset(&event, 0, sizeof event);
3116        event.status = status;
3117        event.param.ud.private_data = mc->context;
3118        if (!status) {
3119                event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
3120                ib_init_ah_from_mcmember(id_priv->id.device,
3121                                         id_priv->id.port_num, &multicast->rec,
3122                                         &event.param.ud.ah_attr);
3123                event.param.ud.qp_num = 0xFFFFFF;
3124                event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
3125        } else
3126                event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
3127
3128        ret = id_priv->id.event_handler(&id_priv->id, &event);
3129        if (ret) {
3130                cma_exch(id_priv, RDMA_CM_DESTROYING);
3131                mutex_unlock(&id_priv->handler_mutex);
3132                rdma_destroy_id(&id_priv->id);
3133                return 0;
3134        }
3135
3136        mutex_unlock(&id_priv->handler_mutex);
3137        return 0;
3138}
3139
3140static void cma_set_mgid(struct rdma_id_private *id_priv,
3141                         struct sockaddr *addr, union ib_gid *mgid)
3142{
3143        unsigned char mc_map[MAX_ADDR_LEN];
3144        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3145        struct sockaddr_in *sin = (struct sockaddr_in *) addr;
3146        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
3147
3148        if (cma_any_addr(addr)) {
3149                memset(mgid, 0, sizeof *mgid);
3150        } else if ((addr->sa_family == AF_INET6) &&
3151                   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
3152                                                                 0xFF10A01B)) {
3153                /* IPv6 address is an SA assigned MGID. */
3154                memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3155        } else if (addr->sa_family == AF_IB) {
3156                memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
3157        } else if ((addr->sa_family == AF_INET6)) {
3158                ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
3159                if (id_priv->id.ps == RDMA_PS_UDP)
3160                        mc_map[7] = 0x01;       /* Use RDMA CM signature */
3161                *mgid = *(union ib_gid *) (mc_map + 4);
3162        } else {
3163                ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
3164                if (id_priv->id.ps == RDMA_PS_UDP)
3165                        mc_map[7] = 0x01;       /* Use RDMA CM signature */
3166                *mgid = *(union ib_gid *) (mc_map + 4);
3167        }
3168}
3169
3170static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
3171                                 struct cma_multicast *mc)
3172{
3173        struct ib_sa_mcmember_rec rec;
3174        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3175        ib_sa_comp_mask comp_mask;
3176        int ret;
3177
3178        ib_addr_get_mgid(dev_addr, &rec.mgid);
3179        ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
3180                                     &rec.mgid, &rec);
3181        if (ret)
3182                return ret;
3183
3184        ret = cma_set_qkey(id_priv, 0);
3185        if (ret)
3186                return ret;
3187
3188        cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
3189        rec.qkey = cpu_to_be32(id_priv->qkey);
3190        rdma_addr_get_sgid(dev_addr, &rec.port_gid);
3191        rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
3192        rec.join_state = 1;
3193
3194        comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
3195                    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
3196                    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
3197                    IB_SA_MCMEMBER_REC_FLOW_LABEL |
3198                    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
3199
3200        if (id_priv->id.ps == RDMA_PS_IPOIB)
3201                comp_mask |= IB_SA_MCMEMBER_REC_RATE |
3202                             IB_SA_MCMEMBER_REC_RATE_SELECTOR |
3203                             IB_SA_MCMEMBER_REC_MTU_SELECTOR |
3204                             IB_SA_MCMEMBER_REC_MTU |
3205                             IB_SA_MCMEMBER_REC_HOP_LIMIT;
3206
3207        mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
3208                                                id_priv->id.port_num, &rec,
3209                                                comp_mask, GFP_KERNEL,
3210                                                cma_ib_mc_handler, mc);
3211        return PTR_RET(mc->multicast.ib);
3212}
3213
3214static void iboe_mcast_work_handler(struct work_struct *work)
3215{
3216        struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
3217        struct cma_multicast *mc = mw->mc;
3218        struct ib_sa_multicast *m = mc->multicast.ib;
3219
3220        mc->multicast.ib->context = mc;
3221        cma_ib_mc_handler(0, m);
3222        kref_put(&mc->mcref, release_mc);
3223        kfree(mw);
3224}
3225
3226static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
3227{
3228        struct sockaddr_in *sin = (struct sockaddr_in *)addr;
3229        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
3230
3231        if (cma_any_addr(addr)) {
3232                memset(mgid, 0, sizeof *mgid);
3233        } else if (addr->sa_family == AF_INET6) {
3234                memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3235        } else {
3236                mgid->raw[0] = 0xff;
3237                mgid->raw[1] = 0x0e;
3238                mgid->raw[2] = 0;
3239                mgid->raw[3] = 0;
3240                mgid->raw[4] = 0;
3241                mgid->raw[5] = 0;
3242                mgid->raw[6] = 0;
3243                mgid->raw[7] = 0;
3244                mgid->raw[8] = 0;
3245                mgid->raw[9] = 0;
3246                mgid->raw[10] = 0xff;
3247                mgid->raw[11] = 0xff;
3248                *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
3249        }
3250}
3251
3252static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
3253                                   struct cma_multicast *mc)
3254{
3255        struct iboe_mcast_work *work;
3256        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3257        int err;
3258        struct sockaddr *addr = (struct sockaddr *)&mc->addr;
3259        struct net_device *ndev = NULL;
3260
3261        if (cma_zero_addr((struct sockaddr *)&mc->addr))
3262                return -EINVAL;
3263
3264        work = kzalloc(sizeof *work, GFP_KERNEL);
3265        if (!work)
3266                return -ENOMEM;
3267
3268        mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
3269        if (!mc->multicast.ib) {
3270                err = -ENOMEM;
3271                goto out1;
3272        }
3273
3274        cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
3275
3276        mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
3277        if (id_priv->id.ps == RDMA_PS_UDP)
3278                mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
3279
3280        if (dev_addr->bound_dev_if)
3281                ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
3282        if (!ndev) {
3283                err = -ENODEV;
3284                goto out2;
3285        }
3286        mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
3287        mc->multicast.ib->rec.hop_limit = 1;
3288        mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
3289        dev_put(ndev);
3290        if (!mc->multicast.ib->rec.mtu) {
3291                err = -EINVAL;
3292                goto out2;
3293        }
3294        iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
3295        work->id = id_priv;
3296        work->mc = mc;
3297        INIT_WORK(&work->work, iboe_mcast_work_handler);
3298        kref_get(&mc->mcref);
3299        queue_work(cma_wq, &work->work);
3300
3301        return 0;
3302
3303out2:
3304        kfree(mc->multicast.ib);
3305out1:
3306        kfree(work);
3307        return err;
3308}
3309
3310int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
3311                        void *context)
3312{
3313        struct rdma_id_private *id_priv;
3314        struct cma_multicast *mc;
3315        int ret;
3316
3317        id_priv = container_of(id, struct rdma_id_private, id);
3318        if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
3319            !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
3320                return -EINVAL;
3321
3322        mc = kmalloc(sizeof *mc, GFP_KERNEL);
3323        if (!mc)
3324                return -ENOMEM;
3325
3326        memcpy(&mc->addr, addr, rdma_addr_size(addr));
3327        mc->context = context;
3328        mc->id_priv = id_priv;
3329
3330        spin_lock(&id_priv->lock);
3331        list_add(&mc->list, &id_priv->mc_list);
3332        spin_unlock(&id_priv->lock);
3333
3334        switch (rdma_node_get_transport(id->device->node_type)) {
3335        case RDMA_TRANSPORT_IB:
3336                switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3337                case IB_LINK_LAYER_INFINIBAND:
3338                        ret = cma_join_ib_multicast(id_priv, mc);
3339                        break;
3340                case IB_LINK_LAYER_ETHERNET:
3341                        kref_init(&mc->mcref);
3342                        ret = cma_iboe_join_multicast(id_priv, mc);
3343                        break;
3344                default:
3345                        ret = -EINVAL;
3346                }
3347                break;
3348        default:
3349                ret = -ENOSYS;
3350                break;
3351        }
3352
3353        if (ret) {
3354                spin_lock_irq(&id_priv->lock);
3355                list_del(&mc->list);
3356                spin_unlock_irq(&id_priv->lock);
3357                kfree(mc);
3358        }
3359        return ret;
3360}
3361EXPORT_SYMBOL(rdma_join_multicast);
3362
3363void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
3364{
3365        struct rdma_id_private *id_priv;
3366        struct cma_multicast *mc;
3367
3368        id_priv = container_of(id, struct rdma_id_private, id);
3369        spin_lock_irq(&id_priv->lock);
3370        list_for_each_entry(mc, &id_priv->mc_list, list) {
3371                if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
3372                        list_del(&mc->list);
3373                        spin_unlock_irq(&id_priv->lock);
3374
3375                        if (id->qp)
3376                                ib_detach_mcast(id->qp,
3377                                                &mc->multicast.ib->rec.mgid,
3378                                                be16_to_cpu(mc->multicast.ib->rec.mlid));
3379                        if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
3380                                switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3381                                case IB_LINK_LAYER_INFINIBAND:
3382                                        ib_sa_free_multicast(mc->multicast.ib);
3383                                        kfree(mc);
3384                                        break;
3385                                case IB_LINK_LAYER_ETHERNET:
3386                                        kref_put(&mc->mcref, release_mc);
3387                                        break;
3388                                default:
3389                                        break;
3390                                }
3391                        }
3392                        return;
3393                }
3394        }
3395        spin_unlock_irq(&id_priv->lock);
3396}
3397EXPORT_SYMBOL(rdma_leave_multicast);
3398
3399static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
3400{
3401        struct rdma_dev_addr *dev_addr;
3402        struct cma_ndev_work *work;
3403
3404        dev_addr = &id_priv->id.route.addr.dev_addr;
3405
3406        if ((dev_addr->bound_dev_if == ndev->ifindex) &&
3407            memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
3408                printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
3409                       ndev->name, &id_priv->id);
3410                work = kzalloc(sizeof *work, GFP_KERNEL);
3411                if (!work)
3412                        return -ENOMEM;
3413
3414                INIT_WORK(&work->work, cma_ndev_work_handler);
3415                work->id = id_priv;
3416                work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
3417                atomic_inc(&id_priv->refcount);
3418                queue_work(cma_wq, &work->work);
3419        }
3420
3421        return 0;
3422}
3423
3424static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
3425                               void *ptr)
3426{
3427        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
3428        struct cma_device *cma_dev;
3429        struct rdma_id_private *id_priv;
3430        int ret = NOTIFY_DONE;
3431
3432        if (dev_net(ndev) != &init_net)
3433                return NOTIFY_DONE;
3434
3435        if (event != NETDEV_BONDING_FAILOVER)
3436                return NOTIFY_DONE;
3437
3438        if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
3439                return NOTIFY_DONE;
3440
3441        mutex_lock(&lock);
3442        list_for_each_entry(cma_dev, &dev_list, list)
3443                list_for_each_entry(id_priv, &cma_dev->id_list, list) {
3444                        ret = cma_netdev_change(ndev, id_priv);
3445                        if (ret)
3446                                goto out;
3447                }
3448
3449out:
3450        mutex_unlock(&lock);
3451        return ret;
3452}
3453
3454static struct notifier_block cma_nb = {
3455        .notifier_call = cma_netdev_callback
3456};
3457
3458static void cma_add_one(struct ib_device *device)
3459{
3460        struct cma_device *cma_dev;
3461        struct rdma_id_private *id_priv;
3462
3463        cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
3464        if (!cma_dev)
3465                return;
3466
3467        cma_dev->device = device;
3468
3469        init_completion(&cma_dev->comp);
3470        atomic_set(&cma_dev->refcount, 1);
3471        INIT_LIST_HEAD(&cma_dev->id_list);
3472        ib_set_client_data(device, &cma_client, cma_dev);
3473
3474        mutex_lock(&lock);
3475        list_add_tail(&cma_dev->list, &dev_list);
3476        list_for_each_entry(id_priv, &listen_any_list, list)
3477                cma_listen_on_dev(id_priv, cma_dev);
3478        mutex_unlock(&lock);
3479}
3480
3481static int cma_remove_id_dev(struct rdma_id_private *id_priv)
3482{
3483        struct rdma_cm_event event;
3484        enum rdma_cm_state state;
3485        int ret = 0;
3486
3487        /* Record that we want to remove the device */
3488        state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
3489        if (state == RDMA_CM_DESTROYING)
3490                return 0;
3491
3492        cma_cancel_operation(id_priv, state);
3493        mutex_lock(&id_priv->handler_mutex);
3494
3495        /* Check for destruction from another callback. */
3496        if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
3497                goto out;
3498
3499        memset(&event, 0, sizeof event);
3500        event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
3501        ret = id_priv->id.event_handler(&id_priv->id, &event);
3502out:
3503        mutex_unlock(&id_priv->handler_mutex);
3504        return ret;
3505}
3506
3507static void cma_process_remove(struct cma_device *cma_dev)
3508{
3509        struct rdma_id_private *id_priv;
3510        int ret;
3511
3512        mutex_lock(&lock);
3513        while (!list_empty(&cma_dev->id_list)) {
3514                id_priv = list_entry(cma_dev->id_list.next,
3515                                     struct rdma_id_private, list);
3516
3517                list_del(&id_priv->listen_list);
3518                list_del_init(&id_priv->list);
3519                atomic_inc(&id_priv->refcount);
3520                mutex_unlock(&lock);
3521
3522                ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
3523                cma_deref_id(id_priv);
3524                if (ret)
3525                        rdma_destroy_id(&id_priv->id);
3526
3527                mutex_lock(&lock);
3528        }
3529        mutex_unlock(&lock);
3530
3531        cma_deref_dev(cma_dev);
3532        wait_for_completion(&cma_dev->comp);
3533}
3534
3535static void cma_remove_one(struct ib_device *device)
3536{
3537        struct cma_device *cma_dev;
3538
3539        cma_dev = ib_get_client_data(device, &cma_client);
3540        if (!cma_dev)
3541                return;
3542
3543        mutex_lock(&lock);
3544        list_del(&cma_dev->list);
3545        mutex_unlock(&lock);
3546
3547        cma_process_remove(cma_dev);
3548        kfree(cma_dev);
3549}
3550
3551static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
3552{
3553        struct nlmsghdr *nlh;
3554        struct rdma_cm_id_stats *id_stats;
3555        struct rdma_id_private *id_priv;
3556        struct rdma_cm_id *id = NULL;
3557        struct cma_device *cma_dev;
3558        int i_dev = 0, i_id = 0;
3559
3560        /*
3561         * We export all of the IDs as a sequence of messages.  Each
3562         * ID gets its own netlink message.
3563         */
3564        mutex_lock(&lock);
3565
3566        list_for_each_entry(cma_dev, &dev_list, list) {
3567                if (i_dev < cb->args[0]) {
3568                        i_dev++;
3569                        continue;
3570                }
3571
3572                i_id = 0;
3573                list_for_each_entry(id_priv, &cma_dev->id_list, list) {
3574                        if (i_id < cb->args[1]) {
3575                                i_id++;
3576                                continue;
3577                        }
3578
3579                        id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
3580                                                sizeof *id_stats, RDMA_NL_RDMA_CM,
3581                                                RDMA_NL_RDMA_CM_ID_STATS);
3582                        if (!id_stats)
3583                                goto out;
3584
3585                        memset(id_stats, 0, sizeof *id_stats);
3586                        id = &id_priv->id;
3587                        id_stats->node_type = id->route.addr.dev_addr.dev_type;
3588                        id_stats->port_num = id->port_num;
3589                        id_stats->bound_dev_if =
3590                                id->route.addr.dev_addr.bound_dev_if;
3591
3592                        if (ibnl_put_attr(skb, nlh,
3593                                          rdma_addr_size(cma_src_addr(id_priv)),
3594                                          cma_src_addr(id_priv),
3595                                          RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
3596                                goto out;
3597                        if (ibnl_put_attr(skb, nlh,
3598                                          rdma_addr_size(cma_src_addr(id_priv)),
3599                                          cma_dst_addr(id_priv),
3600                                          RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
3601                                goto out;
3602
3603                        id_stats->pid           = id_priv->owner;
3604                        id_stats->port_space    = id->ps;
3605                        id_stats->cm_state      = id_priv->state;
3606                        id_stats->qp_num        = id_priv->qp_num;
3607                        id_stats->qp_type       = id->qp_type;
3608
3609                        i_id++;
3610                }
3611
3612                cb->args[1] = 0;
3613                i_dev++;
3614        }
3615
3616out:
3617        mutex_unlock(&lock);
3618        cb->args[0] = i_dev;
3619        cb->args[1] = i_id;
3620
3621        return skb->len;
3622}
3623
3624static const struct ibnl_client_cbs cma_cb_table[] = {
3625        [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats,
3626                                       .module = THIS_MODULE },
3627};
3628
3629static int __init cma_init(void)
3630{
3631        int ret;
3632
3633        cma_wq = create_singlethread_workqueue("rdma_cm");
3634        if (!cma_wq)
3635                return -ENOMEM;
3636
3637        ib_sa_register_client(&sa_client);
3638        rdma_addr_register_client(&addr_client);
3639        register_netdevice_notifier(&cma_nb);
3640
3641        ret = ib_register_client(&cma_client);
3642        if (ret)
3643                goto err;
3644
3645        if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
3646                printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
3647
3648        return 0;
3649
3650err:
3651        unregister_netdevice_notifier(&cma_nb);
3652        rdma_addr_unregister_client(&addr_client);
3653        ib_sa_unregister_client(&sa_client);
3654        destroy_workqueue(cma_wq);
3655        return ret;
3656}
3657
3658static void __exit cma_cleanup(void)
3659{
3660        ibnl_remove_client(RDMA_NL_RDMA_CM);
3661        ib_unregister_client(&cma_client);
3662        unregister_netdevice_notifier(&cma_nb);
3663        rdma_addr_unregister_client(&addr_client);
3664        ib_sa_unregister_client(&sa_client);
3665        destroy_workqueue(cma_wq);
3666        idr_destroy(&tcp_ps);
3667        idr_destroy(&udp_ps);
3668        idr_destroy(&ipoib_ps);
3669        idr_destroy(&ib_ps);
3670}
3671
3672module_init(cma_init);
3673module_exit(cma_cleanup);
3674