linux/drivers/infiniband/core/cma.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
   3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
   4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
   5 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/completion.h>
  37#include <linux/in.h>
  38#include <linux/in6.h>
  39#include <linux/mutex.h>
  40#include <linux/random.h>
  41#include <linux/igmp.h>
  42#include <linux/idr.h>
  43#include <linux/inetdevice.h>
  44#include <linux/slab.h>
  45#include <linux/module.h>
  46#include <net/route.h>
  47
  48#include <net/net_namespace.h>
  49#include <net/netns/generic.h>
  50#include <net/tcp.h>
  51#include <net/ipv6.h>
  52#include <net/ip_fib.h>
  53#include <net/ip6_route.h>
  54
  55#include <rdma/rdma_cm.h>
  56#include <rdma/rdma_cm_ib.h>
  57#include <rdma/rdma_netlink.h>
  58#include <rdma/ib.h>
  59#include <rdma/ib_cache.h>
  60#include <rdma/ib_cm.h>
  61#include <rdma/ib_sa.h>
  62#include <rdma/iw_cm.h>
  63
  64#include "core_priv.h"
  65
  66MODULE_AUTHOR("Sean Hefty");
  67MODULE_DESCRIPTION("Generic RDMA CM Agent");
  68MODULE_LICENSE("Dual BSD/GPL");
  69
  70#define CMA_CM_RESPONSE_TIMEOUT 20
  71#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
  72#define CMA_MAX_CM_RETRIES 15
  73#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
  74#define CMA_IBOE_PACKET_LIFETIME 18
  75#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
  76
  77static const char * const cma_events[] = {
  78        [RDMA_CM_EVENT_ADDR_RESOLVED]    = "address resolved",
  79        [RDMA_CM_EVENT_ADDR_ERROR]       = "address error",
  80        [RDMA_CM_EVENT_ROUTE_RESOLVED]   = "route resolved ",
  81        [RDMA_CM_EVENT_ROUTE_ERROR]      = "route error",
  82        [RDMA_CM_EVENT_CONNECT_REQUEST]  = "connect request",
  83        [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response",
  84        [RDMA_CM_EVENT_CONNECT_ERROR]    = "connect error",
  85        [RDMA_CM_EVENT_UNREACHABLE]      = "unreachable",
  86        [RDMA_CM_EVENT_REJECTED]         = "rejected",
  87        [RDMA_CM_EVENT_ESTABLISHED]      = "established",
  88        [RDMA_CM_EVENT_DISCONNECTED]     = "disconnected",
  89        [RDMA_CM_EVENT_DEVICE_REMOVAL]   = "device removal",
  90        [RDMA_CM_EVENT_MULTICAST_JOIN]   = "multicast join",
  91        [RDMA_CM_EVENT_MULTICAST_ERROR]  = "multicast error",
  92        [RDMA_CM_EVENT_ADDR_CHANGE]      = "address change",
  93        [RDMA_CM_EVENT_TIMEWAIT_EXIT]    = "timewait exit",
  94};
  95
  96const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
  97{
  98        size_t index = event;
  99
 100        return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ?
 101                        cma_events[index] : "unrecognized event";
 102}
 103EXPORT_SYMBOL(rdma_event_msg);
 104
 105const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
 106                                                int reason)
 107{
 108        if (rdma_ib_or_roce(id->device, id->port_num))
 109                return ibcm_reject_msg(reason);
 110
 111        if (rdma_protocol_iwarp(id->device, id->port_num))
 112                return iwcm_reject_msg(reason);
 113
 114        WARN_ON_ONCE(1);
 115        return "unrecognized transport";
 116}
 117EXPORT_SYMBOL(rdma_reject_msg);
 118
 119bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
 120{
 121        if (rdma_ib_or_roce(id->device, id->port_num))
 122                return reason == IB_CM_REJ_CONSUMER_DEFINED;
 123
 124        if (rdma_protocol_iwarp(id->device, id->port_num))
 125                return reason == -ECONNREFUSED;
 126
 127        WARN_ON_ONCE(1);
 128        return false;
 129}
 130EXPORT_SYMBOL(rdma_is_consumer_reject);
 131
 132const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
 133                                      struct rdma_cm_event *ev, u8 *data_len)
 134{
 135        const void *p;
 136
 137        if (rdma_is_consumer_reject(id, ev->status)) {
 138                *data_len = ev->param.conn.private_data_len;
 139                p = ev->param.conn.private_data;
 140        } else {
 141                *data_len = 0;
 142                p = NULL;
 143        }
 144        return p;
 145}
 146EXPORT_SYMBOL(rdma_consumer_reject_data);
 147
 148static void cma_add_one(struct ib_device *device);
 149static void cma_remove_one(struct ib_device *device, void *client_data);
 150
 151static struct ib_client cma_client = {
 152        .name   = "cma",
 153        .add    = cma_add_one,
 154        .remove = cma_remove_one
 155};
 156
 157static struct ib_sa_client sa_client;
 158static struct rdma_addr_client addr_client;
 159static LIST_HEAD(dev_list);
 160static LIST_HEAD(listen_any_list);
 161static DEFINE_MUTEX(lock);
 162static struct workqueue_struct *cma_wq;
 163static unsigned int cma_pernet_id;
 164
 165struct cma_pernet {
 166        struct idr tcp_ps;
 167        struct idr udp_ps;
 168        struct idr ipoib_ps;
 169        struct idr ib_ps;
 170};
 171
 172static struct cma_pernet *cma_pernet(struct net *net)
 173{
 174        return net_generic(net, cma_pernet_id);
 175}
 176
 177static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
 178{
 179        struct cma_pernet *pernet = cma_pernet(net);
 180
 181        switch (ps) {
 182        case RDMA_PS_TCP:
 183                return &pernet->tcp_ps;
 184        case RDMA_PS_UDP:
 185                return &pernet->udp_ps;
 186        case RDMA_PS_IPOIB:
 187                return &pernet->ipoib_ps;
 188        case RDMA_PS_IB:
 189                return &pernet->ib_ps;
 190        default:
 191                return NULL;
 192        }
 193}
 194
 195struct cma_device {
 196        struct list_head        list;
 197        struct ib_device        *device;
 198        struct completion       comp;
 199        atomic_t                refcount;
 200        struct list_head        id_list;
 201        enum ib_gid_type        *default_gid_type;
 202        u8                      *default_roce_tos;
 203};
 204
 205struct rdma_bind_list {
 206        enum rdma_port_space    ps;
 207        struct hlist_head       owners;
 208        unsigned short          port;
 209};
 210
 211struct class_port_info_context {
 212        struct ib_class_port_info       *class_port_info;
 213        struct ib_device                *device;
 214        struct completion               done;
 215        struct ib_sa_query              *sa_query;
 216        u8                              port_num;
 217};
 218
 219static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
 220                        struct rdma_bind_list *bind_list, int snum)
 221{
 222        struct idr *idr = cma_pernet_idr(net, ps);
 223
 224        return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
 225}
 226
 227static struct rdma_bind_list *cma_ps_find(struct net *net,
 228                                          enum rdma_port_space ps, int snum)
 229{
 230        struct idr *idr = cma_pernet_idr(net, ps);
 231
 232        return idr_find(idr, snum);
 233}
 234
 235static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
 236{
 237        struct idr *idr = cma_pernet_idr(net, ps);
 238
 239        idr_remove(idr, snum);
 240}
 241
 242enum {
 243        CMA_OPTION_AFONLY,
 244};
 245
 246void cma_ref_dev(struct cma_device *cma_dev)
 247{
 248        atomic_inc(&cma_dev->refcount);
 249}
 250
 251struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter  filter,
 252                                             void               *cookie)
 253{
 254        struct cma_device *cma_dev;
 255        struct cma_device *found_cma_dev = NULL;
 256
 257        mutex_lock(&lock);
 258
 259        list_for_each_entry(cma_dev, &dev_list, list)
 260                if (filter(cma_dev->device, cookie)) {
 261                        found_cma_dev = cma_dev;
 262                        break;
 263                }
 264
 265        if (found_cma_dev)
 266                cma_ref_dev(found_cma_dev);
 267        mutex_unlock(&lock);
 268        return found_cma_dev;
 269}
 270
 271int cma_get_default_gid_type(struct cma_device *cma_dev,
 272                             unsigned int port)
 273{
 274        if (!rdma_is_port_valid(cma_dev->device, port))
 275                return -EINVAL;
 276
 277        return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
 278}
 279
 280int cma_set_default_gid_type(struct cma_device *cma_dev,
 281                             unsigned int port,
 282                             enum ib_gid_type default_gid_type)
 283{
 284        unsigned long supported_gids;
 285
 286        if (!rdma_is_port_valid(cma_dev->device, port))
 287                return -EINVAL;
 288
 289        supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
 290
 291        if (!(supported_gids & 1 << default_gid_type))
 292                return -EINVAL;
 293
 294        cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
 295                default_gid_type;
 296
 297        return 0;
 298}
 299
 300int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
 301{
 302        if (!rdma_is_port_valid(cma_dev->device, port))
 303                return -EINVAL;
 304
 305        return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
 306}
 307
 308int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port,
 309                             u8 default_roce_tos)
 310{
 311        if (!rdma_is_port_valid(cma_dev->device, port))
 312                return -EINVAL;
 313
 314        cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] =
 315                 default_roce_tos;
 316
 317        return 0;
 318}
 319struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
 320{
 321        return cma_dev->device;
 322}
 323
 324/*
 325 * Device removal can occur at anytime, so we need extra handling to
 326 * serialize notifying the user of device removal with other callbacks.
 327 * We do this by disabling removal notification while a callback is in process,
 328 * and reporting it after the callback completes.
 329 */
 330struct rdma_id_private {
 331        struct rdma_cm_id       id;
 332
 333        struct rdma_bind_list   *bind_list;
 334        struct hlist_node       node;
 335        struct list_head        list; /* listen_any_list or cma_device.list */
 336        struct list_head        listen_list; /* per device listens */
 337        struct cma_device       *cma_dev;
 338        struct list_head        mc_list;
 339
 340        int                     internal_id;
 341        enum rdma_cm_state      state;
 342        spinlock_t              lock;
 343        struct mutex            qp_mutex;
 344
 345        struct completion       comp;
 346        atomic_t                refcount;
 347        struct mutex            handler_mutex;
 348
 349        int                     backlog;
 350        int                     timeout_ms;
 351        struct ib_sa_query      *query;
 352        int                     query_id;
 353        union {
 354                struct ib_cm_id *ib;
 355                struct iw_cm_id *iw;
 356        } cm_id;
 357
 358        u32                     seq_num;
 359        u32                     qkey;
 360        u32                     qp_num;
 361        pid_t                   owner;
 362        u32                     options;
 363        u8                      srq;
 364        u8                      tos;
 365        bool                    tos_set;
 366        u8                      reuseaddr;
 367        u8                      afonly;
 368        enum ib_gid_type        gid_type;
 369};
 370
 371struct cma_multicast {
 372        struct rdma_id_private *id_priv;
 373        union {
 374                struct ib_sa_multicast *ib;
 375        } multicast;
 376        struct list_head        list;
 377        void                    *context;
 378        struct sockaddr_storage addr;
 379        struct kref             mcref;
 380        bool                    igmp_joined;
 381        u8                      join_state;
 382};
 383
 384struct cma_work {
 385        struct work_struct      work;
 386        struct rdma_id_private  *id;
 387        enum rdma_cm_state      old_state;
 388        enum rdma_cm_state      new_state;
 389        struct rdma_cm_event    event;
 390};
 391
 392struct cma_ndev_work {
 393        struct work_struct      work;
 394        struct rdma_id_private  *id;
 395        struct rdma_cm_event    event;
 396};
 397
 398struct iboe_mcast_work {
 399        struct work_struct       work;
 400        struct rdma_id_private  *id;
 401        struct cma_multicast    *mc;
 402};
 403
 404union cma_ip_addr {
 405        struct in6_addr ip6;
 406        struct {
 407                __be32 pad[3];
 408                __be32 addr;
 409        } ip4;
 410};
 411
 412struct cma_hdr {
 413        u8 cma_version;
 414        u8 ip_version;  /* IP version: 7:4 */
 415        __be16 port;
 416        union cma_ip_addr src_addr;
 417        union cma_ip_addr dst_addr;
 418};
 419
 420#define CMA_VERSION 0x00
 421
 422struct cma_req_info {
 423        struct ib_device *device;
 424        int port;
 425        union ib_gid local_gid;
 426        __be64 service_id;
 427        u16 pkey;
 428        bool has_gid:1;
 429};
 430
 431static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
 432{
 433        unsigned long flags;
 434        int ret;
 435
 436        spin_lock_irqsave(&id_priv->lock, flags);
 437        ret = (id_priv->state == comp);
 438        spin_unlock_irqrestore(&id_priv->lock, flags);
 439        return ret;
 440}
 441
 442static int cma_comp_exch(struct rdma_id_private *id_priv,
 443                         enum rdma_cm_state comp, enum rdma_cm_state exch)
 444{
 445        unsigned long flags;
 446        int ret;
 447
 448        spin_lock_irqsave(&id_priv->lock, flags);
 449        if ((ret = (id_priv->state == comp)))
 450                id_priv->state = exch;
 451        spin_unlock_irqrestore(&id_priv->lock, flags);
 452        return ret;
 453}
 454
 455static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
 456                                   enum rdma_cm_state exch)
 457{
 458        unsigned long flags;
 459        enum rdma_cm_state old;
 460
 461        spin_lock_irqsave(&id_priv->lock, flags);
 462        old = id_priv->state;
 463        id_priv->state = exch;
 464        spin_unlock_irqrestore(&id_priv->lock, flags);
 465        return old;
 466}
 467
 468static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
 469{
 470        return hdr->ip_version >> 4;
 471}
 472
 473static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
 474{
 475        hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
 476}
 477
 478static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
 479{
 480        struct in_device *in_dev = NULL;
 481
 482        if (ndev) {
 483                rtnl_lock();
 484                in_dev = __in_dev_get_rtnl(ndev);
 485                if (in_dev) {
 486                        if (join)
 487                                ip_mc_inc_group(in_dev,
 488                                                *(__be32 *)(mgid->raw + 12));
 489                        else
 490                                ip_mc_dec_group(in_dev,
 491                                                *(__be32 *)(mgid->raw + 12));
 492                }
 493                rtnl_unlock();
 494        }
 495        return (in_dev) ? 0 : -ENODEV;
 496}
 497
 498static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
 499                               struct cma_device *cma_dev)
 500{
 501        cma_ref_dev(cma_dev);
 502        id_priv->cma_dev = cma_dev;
 503        id_priv->gid_type = 0;
 504        id_priv->id.device = cma_dev->device;
 505        id_priv->id.route.addr.dev_addr.transport =
 506                rdma_node_get_transport(cma_dev->device->node_type);
 507        list_add_tail(&id_priv->list, &cma_dev->id_list);
 508}
 509
 510static void cma_attach_to_dev(struct rdma_id_private *id_priv,
 511                              struct cma_device *cma_dev)
 512{
 513        _cma_attach_to_dev(id_priv, cma_dev);
 514        id_priv->gid_type =
 515                cma_dev->default_gid_type[id_priv->id.port_num -
 516                                          rdma_start_port(cma_dev->device)];
 517}
 518
 519void cma_deref_dev(struct cma_device *cma_dev)
 520{
 521        if (atomic_dec_and_test(&cma_dev->refcount))
 522                complete(&cma_dev->comp);
 523}
 524
 525static inline void release_mc(struct kref *kref)
 526{
 527        struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
 528
 529        kfree(mc->multicast.ib);
 530        kfree(mc);
 531}
 532
 533static void cma_release_dev(struct rdma_id_private *id_priv)
 534{
 535        mutex_lock(&lock);
 536        list_del(&id_priv->list);
 537        cma_deref_dev(id_priv->cma_dev);
 538        id_priv->cma_dev = NULL;
 539        mutex_unlock(&lock);
 540}
 541
 542static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
 543{
 544        return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
 545}
 546
 547static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
 548{
 549        return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
 550}
 551
 552static inline unsigned short cma_family(struct rdma_id_private *id_priv)
 553{
 554        return id_priv->id.route.addr.src_addr.ss_family;
 555}
 556
 557static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
 558{
 559        struct ib_sa_mcmember_rec rec;
 560        int ret = 0;
 561
 562        if (id_priv->qkey) {
 563                if (qkey && id_priv->qkey != qkey)
 564                        return -EINVAL;
 565                return 0;
 566        }
 567
 568        if (qkey) {
 569                id_priv->qkey = qkey;
 570                return 0;
 571        }
 572
 573        switch (id_priv->id.ps) {
 574        case RDMA_PS_UDP:
 575        case RDMA_PS_IB:
 576                id_priv->qkey = RDMA_UDP_QKEY;
 577                break;
 578        case RDMA_PS_IPOIB:
 579                ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
 580                ret = ib_sa_get_mcmember_rec(id_priv->id.device,
 581                                             id_priv->id.port_num, &rec.mgid,
 582                                             &rec);
 583                if (!ret)
 584                        id_priv->qkey = be32_to_cpu(rec.qkey);
 585                break;
 586        default:
 587                break;
 588        }
 589        return ret;
 590}
 591
 592static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
 593{
 594        dev_addr->dev_type = ARPHRD_INFINIBAND;
 595        rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
 596        ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
 597}
 598
 599static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 600{
 601        int ret;
 602
 603        if (addr->sa_family != AF_IB) {
 604                ret = rdma_translate_ip(addr, dev_addr, NULL);
 605        } else {
 606                cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
 607                ret = 0;
 608        }
 609
 610        return ret;
 611}
 612
 613static inline int cma_validate_port(struct ib_device *device, u8 port,
 614                                    enum ib_gid_type gid_type,
 615                                      union ib_gid *gid, int dev_type,
 616                                      int bound_if_index)
 617{
 618        int ret = -ENODEV;
 619        struct net_device *ndev = NULL;
 620
 621        if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
 622                return ret;
 623
 624        if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
 625                return ret;
 626
 627        if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port))
 628                ndev = dev_get_by_index(&init_net, bound_if_index);
 629        else
 630                gid_type = IB_GID_TYPE_IB;
 631
 632
 633        ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
 634                                         ndev, NULL);
 635
 636        if (ndev)
 637                dev_put(ndev);
 638
 639        return ret;
 640}
 641
 642static int cma_acquire_dev(struct rdma_id_private *id_priv,
 643                           struct rdma_id_private *listen_id_priv)
 644{
 645        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 646        struct cma_device *cma_dev;
 647        union ib_gid gid, iboe_gid, *gidp;
 648        int ret = -ENODEV;
 649        u8 port;
 650
 651        if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
 652            id_priv->id.ps == RDMA_PS_IPOIB)
 653                return -EINVAL;
 654
 655        mutex_lock(&lock);
 656        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
 657                    &iboe_gid);
 658
 659        memcpy(&gid, dev_addr->src_dev_addr +
 660               rdma_addr_gid_offset(dev_addr), sizeof gid);
 661
 662        if (listen_id_priv) {
 663                cma_dev = listen_id_priv->cma_dev;
 664                port = listen_id_priv->id.port_num;
 665                gidp = rdma_protocol_roce(cma_dev->device, port) ?
 666                       &iboe_gid : &gid;
 667
 668                ret = cma_validate_port(cma_dev->device, port,
 669                                        rdma_protocol_ib(cma_dev->device, port) ?
 670                                        IB_GID_TYPE_IB :
 671                                        listen_id_priv->gid_type, gidp,
 672                                        dev_addr->dev_type,
 673                                        dev_addr->bound_dev_if);
 674                if (!ret) {
 675                        id_priv->id.port_num = port;
 676                        goto out;
 677                }
 678        }
 679
 680        list_for_each_entry(cma_dev, &dev_list, list) {
 681                for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
 682                        if (listen_id_priv &&
 683                            listen_id_priv->cma_dev == cma_dev &&
 684                            listen_id_priv->id.port_num == port)
 685                                continue;
 686
 687                        gidp = rdma_protocol_roce(cma_dev->device, port) ?
 688                               &iboe_gid : &gid;
 689
 690                        ret = cma_validate_port(cma_dev->device, port,
 691                                                rdma_protocol_ib(cma_dev->device, port) ?
 692                                                IB_GID_TYPE_IB :
 693                                                cma_dev->default_gid_type[port - 1],
 694                                                gidp, dev_addr->dev_type,
 695                                                dev_addr->bound_dev_if);
 696                        if (!ret) {
 697                                id_priv->id.port_num = port;
 698                                goto out;
 699                        }
 700                }
 701        }
 702
 703out:
 704        if (!ret)
 705                cma_attach_to_dev(id_priv, cma_dev);
 706
 707        mutex_unlock(&lock);
 708        return ret;
 709}
 710
 711/*
 712 * Select the source IB device and address to reach the destination IB address.
 713 */
 714static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
 715{
 716        struct cma_device *cma_dev, *cur_dev;
 717        struct sockaddr_ib *addr;
 718        union ib_gid gid, sgid, *dgid;
 719        u16 pkey, index;
 720        u8 p;
 721        enum ib_port_state port_state;
 722        int i;
 723
 724        cma_dev = NULL;
 725        addr = (struct sockaddr_ib *) cma_dst_addr(id_priv);
 726        dgid = (union ib_gid *) &addr->sib_addr;
 727        pkey = ntohs(addr->sib_pkey);
 728
 729        list_for_each_entry(cur_dev, &dev_list, list) {
 730                for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
 731                        if (!rdma_cap_af_ib(cur_dev->device, p))
 732                                continue;
 733
 734                        if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
 735                                continue;
 736
 737                        if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
 738                                continue;
 739                        for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
 740                                                       &gid, NULL);
 741                             i++) {
 742                                if (!memcmp(&gid, dgid, sizeof(gid))) {
 743                                        cma_dev = cur_dev;
 744                                        sgid = gid;
 745                                        id_priv->id.port_num = p;
 746                                        goto found;
 747                                }
 748
 749                                if (!cma_dev && (gid.global.subnet_prefix ==
 750                                    dgid->global.subnet_prefix) &&
 751                                    port_state == IB_PORT_ACTIVE) {
 752                                        cma_dev = cur_dev;
 753                                        sgid = gid;
 754                                        id_priv->id.port_num = p;
 755                                }
 756                        }
 757                }
 758        }
 759
 760        if (!cma_dev)
 761                return -ENODEV;
 762
 763found:
 764        cma_attach_to_dev(id_priv, cma_dev);
 765        addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
 766        memcpy(&addr->sib_addr, &sgid, sizeof sgid);
 767        cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
 768        return 0;
 769}
 770
 771static void cma_deref_id(struct rdma_id_private *id_priv)
 772{
 773        if (atomic_dec_and_test(&id_priv->refcount))
 774                complete(&id_priv->comp);
 775}
 776
 777struct rdma_cm_id *rdma_create_id(struct net *net,
 778                                  rdma_cm_event_handler event_handler,
 779                                  void *context, enum rdma_port_space ps,
 780                                  enum ib_qp_type qp_type)
 781{
 782        struct rdma_id_private *id_priv;
 783
 784        id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
 785        if (!id_priv)
 786                return ERR_PTR(-ENOMEM);
 787
 788        id_priv->owner = task_pid_nr(current);
 789        id_priv->state = RDMA_CM_IDLE;
 790        id_priv->id.context = context;
 791        id_priv->id.event_handler = event_handler;
 792        id_priv->id.ps = ps;
 793        id_priv->id.qp_type = qp_type;
 794        id_priv->tos_set = false;
 795        spin_lock_init(&id_priv->lock);
 796        mutex_init(&id_priv->qp_mutex);
 797        init_completion(&id_priv->comp);
 798        atomic_set(&id_priv->refcount, 1);
 799        mutex_init(&id_priv->handler_mutex);
 800        INIT_LIST_HEAD(&id_priv->listen_list);
 801        INIT_LIST_HEAD(&id_priv->mc_list);
 802        get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
 803        id_priv->id.route.addr.dev_addr.net = get_net(net);
 804
 805        return &id_priv->id;
 806}
 807EXPORT_SYMBOL(rdma_create_id);
 808
 809static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 810{
 811        struct ib_qp_attr qp_attr;
 812        int qp_attr_mask, ret;
 813
 814        qp_attr.qp_state = IB_QPS_INIT;
 815        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 816        if (ret)
 817                return ret;
 818
 819        ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
 820        if (ret)
 821                return ret;
 822
 823        qp_attr.qp_state = IB_QPS_RTR;
 824        ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
 825        if (ret)
 826                return ret;
 827
 828        qp_attr.qp_state = IB_QPS_RTS;
 829        qp_attr.sq_psn = 0;
 830        ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
 831
 832        return ret;
 833}
 834
 835static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 836{
 837        struct ib_qp_attr qp_attr;
 838        int qp_attr_mask, ret;
 839
 840        qp_attr.qp_state = IB_QPS_INIT;
 841        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 842        if (ret)
 843                return ret;
 844
 845        return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
 846}
 847
 848int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 849                   struct ib_qp_init_attr *qp_init_attr)
 850{
 851        struct rdma_id_private *id_priv;
 852        struct ib_qp *qp;
 853        int ret;
 854
 855        id_priv = container_of(id, struct rdma_id_private, id);
 856        if (id->device != pd->device)
 857                return -EINVAL;
 858
 859        qp_init_attr->port_num = id->port_num;
 860        qp = ib_create_qp(pd, qp_init_attr);
 861        if (IS_ERR(qp))
 862                return PTR_ERR(qp);
 863
 864        if (id->qp_type == IB_QPT_UD)
 865                ret = cma_init_ud_qp(id_priv, qp);
 866        else
 867                ret = cma_init_conn_qp(id_priv, qp);
 868        if (ret)
 869                goto err;
 870
 871        id->qp = qp;
 872        id_priv->qp_num = qp->qp_num;
 873        id_priv->srq = (qp->srq != NULL);
 874        return 0;
 875err:
 876        ib_destroy_qp(qp);
 877        return ret;
 878}
 879EXPORT_SYMBOL(rdma_create_qp);
 880
 881void rdma_destroy_qp(struct rdma_cm_id *id)
 882{
 883        struct rdma_id_private *id_priv;
 884
 885        id_priv = container_of(id, struct rdma_id_private, id);
 886        mutex_lock(&id_priv->qp_mutex);
 887        ib_destroy_qp(id_priv->id.qp);
 888        id_priv->id.qp = NULL;
 889        mutex_unlock(&id_priv->qp_mutex);
 890}
 891EXPORT_SYMBOL(rdma_destroy_qp);
 892
 893static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
 894                             struct rdma_conn_param *conn_param)
 895{
 896        struct ib_qp_attr qp_attr;
 897        int qp_attr_mask, ret;
 898        union ib_gid sgid;
 899
 900        mutex_lock(&id_priv->qp_mutex);
 901        if (!id_priv->id.qp) {
 902                ret = 0;
 903                goto out;
 904        }
 905
 906        /* Need to update QP attributes from default values. */
 907        qp_attr.qp_state = IB_QPS_INIT;
 908        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 909        if (ret)
 910                goto out;
 911
 912        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 913        if (ret)
 914                goto out;
 915
 916        qp_attr.qp_state = IB_QPS_RTR;
 917        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 918        if (ret)
 919                goto out;
 920
 921        ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
 922                           rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index,
 923                           &sgid, NULL);
 924        if (ret)
 925                goto out;
 926
 927        BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
 928
 929        if (conn_param)
 930                qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
 931        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 932out:
 933        mutex_unlock(&id_priv->qp_mutex);
 934        return ret;
 935}
 936
 937static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
 938                             struct rdma_conn_param *conn_param)
 939{
 940        struct ib_qp_attr qp_attr;
 941        int qp_attr_mask, ret;
 942
 943        mutex_lock(&id_priv->qp_mutex);
 944        if (!id_priv->id.qp) {
 945                ret = 0;
 946                goto out;
 947        }
 948
 949        qp_attr.qp_state = IB_QPS_RTS;
 950        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 951        if (ret)
 952                goto out;
 953
 954        if (conn_param)
 955                qp_attr.max_rd_atomic = conn_param->initiator_depth;
 956        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 957out:
 958        mutex_unlock(&id_priv->qp_mutex);
 959        return ret;
 960}
 961
 962static int cma_modify_qp_err(struct rdma_id_private *id_priv)
 963{
 964        struct ib_qp_attr qp_attr;
 965        int ret;
 966
 967        mutex_lock(&id_priv->qp_mutex);
 968        if (!id_priv->id.qp) {
 969                ret = 0;
 970                goto out;
 971        }
 972
 973        qp_attr.qp_state = IB_QPS_ERR;
 974        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
 975out:
 976        mutex_unlock(&id_priv->qp_mutex);
 977        return ret;
 978}
 979
 980static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
 981                               struct ib_qp_attr *qp_attr, int *qp_attr_mask)
 982{
 983        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 984        int ret;
 985        u16 pkey;
 986
 987        if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num))
 988                pkey = 0xffff;
 989        else
 990                pkey = ib_addr_get_pkey(dev_addr);
 991
 992        ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
 993                                  pkey, &qp_attr->pkey_index);
 994        if (ret)
 995                return ret;
 996
 997        qp_attr->port_num = id_priv->id.port_num;
 998        *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
 999
1000        if (id_priv->id.qp_type == IB_QPT_UD) {
1001                ret = cma_set_qkey(id_priv, 0);
1002                if (ret)
1003                        return ret;
1004
1005                qp_attr->qkey = id_priv->qkey;
1006                *qp_attr_mask |= IB_QP_QKEY;
1007        } else {
1008                qp_attr->qp_access_flags = 0;
1009                *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
1010        }
1011        return 0;
1012}
1013
1014int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
1015                       int *qp_attr_mask)
1016{
1017        struct rdma_id_private *id_priv;
1018        int ret = 0;
1019
1020        id_priv = container_of(id, struct rdma_id_private, id);
1021        if (rdma_cap_ib_cm(id->device, id->port_num)) {
1022                if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))
1023                        ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
1024                else
1025                        ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
1026                                                 qp_attr_mask);
1027
1028                if (qp_attr->qp_state == IB_QPS_RTR)
1029                        qp_attr->rq_psn = id_priv->seq_num;
1030        } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
1031                if (!id_priv->cm_id.iw) {
1032                        qp_attr->qp_access_flags = 0;
1033                        *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
1034                } else
1035                        ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
1036                                                 qp_attr_mask);
1037                qp_attr->port_num = id_priv->id.port_num;
1038                *qp_attr_mask |= IB_QP_PORT;
1039        } else
1040                ret = -ENOSYS;
1041
1042        return ret;
1043}
1044EXPORT_SYMBOL(rdma_init_qp_attr);
1045
1046static inline int cma_zero_addr(struct sockaddr *addr)
1047{
1048        switch (addr->sa_family) {
1049        case AF_INET:
1050                return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
1051        case AF_INET6:
1052                return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
1053        case AF_IB:
1054                return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
1055        default:
1056                return 0;
1057        }
1058}
1059
1060static inline int cma_loopback_addr(struct sockaddr *addr)
1061{
1062        switch (addr->sa_family) {
1063        case AF_INET:
1064                return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
1065        case AF_INET6:
1066                return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
1067        case AF_IB:
1068                return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
1069        default:
1070                return 0;
1071        }
1072}
1073
1074static inline int cma_any_addr(struct sockaddr *addr)
1075{
1076        return cma_zero_addr(addr) || cma_loopback_addr(addr);
1077}
1078
1079static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
1080{
1081        if (src->sa_family != dst->sa_family)
1082                return -1;
1083
1084        switch (src->sa_family) {
1085        case AF_INET:
1086                return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
1087                       ((struct sockaddr_in *) dst)->sin_addr.s_addr;
1088        case AF_INET6:
1089                return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
1090                                     &((struct sockaddr_in6 *) dst)->sin6_addr);
1091        default:
1092                return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
1093                                   &((struct sockaddr_ib *) dst)->sib_addr);
1094        }
1095}
1096
1097static __be16 cma_port(struct sockaddr *addr)
1098{
1099        struct sockaddr_ib *sib;
1100
1101        switch (addr->sa_family) {
1102        case AF_INET:
1103                return ((struct sockaddr_in *) addr)->sin_port;
1104        case AF_INET6:
1105                return ((struct sockaddr_in6 *) addr)->sin6_port;
1106        case AF_IB:
1107                sib = (struct sockaddr_ib *) addr;
1108                return htons((u16) (be64_to_cpu(sib->sib_sid) &
1109                                    be64_to_cpu(sib->sib_sid_mask)));
1110        default:
1111                return 0;
1112        }
1113}
1114
1115static inline int cma_any_port(struct sockaddr *addr)
1116{
1117        return !cma_port(addr);
1118}
1119
1120static void cma_save_ib_info(struct sockaddr *src_addr,
1121                             struct sockaddr *dst_addr,
1122                             struct rdma_cm_id *listen_id,
1123                             struct sa_path_rec *path)
1124{
1125        struct sockaddr_ib *listen_ib, *ib;
1126
1127        listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
1128        if (src_addr) {
1129                ib = (struct sockaddr_ib *)src_addr;
1130                ib->sib_family = AF_IB;
1131                if (path) {
1132                        ib->sib_pkey = path->pkey;
1133                        ib->sib_flowinfo = path->flow_label;
1134                        memcpy(&ib->sib_addr, &path->sgid, 16);
1135                        ib->sib_sid = path->service_id;
1136                        ib->sib_scope_id = 0;
1137                } else {
1138                        ib->sib_pkey = listen_ib->sib_pkey;
1139                        ib->sib_flowinfo = listen_ib->sib_flowinfo;
1140                        ib->sib_addr = listen_ib->sib_addr;
1141                        ib->sib_sid = listen_ib->sib_sid;
1142                        ib->sib_scope_id = listen_ib->sib_scope_id;
1143                }
1144                ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
1145        }
1146        if (dst_addr) {
1147                ib = (struct sockaddr_ib *)dst_addr;
1148                ib->sib_family = AF_IB;
1149                if (path) {
1150                        ib->sib_pkey = path->pkey;
1151                        ib->sib_flowinfo = path->flow_label;
1152                        memcpy(&ib->sib_addr, &path->dgid, 16);
1153                }
1154        }
1155}
1156
1157static void cma_save_ip4_info(struct sockaddr_in *src_addr,
1158                              struct sockaddr_in *dst_addr,
1159                              struct cma_hdr *hdr,
1160                              __be16 local_port)
1161{
1162        if (src_addr) {
1163                *src_addr = (struct sockaddr_in) {
1164                        .sin_family = AF_INET,
1165                        .sin_addr.s_addr = hdr->dst_addr.ip4.addr,
1166                        .sin_port = local_port,
1167                };
1168        }
1169
1170        if (dst_addr) {
1171                *dst_addr = (struct sockaddr_in) {
1172                        .sin_family = AF_INET,
1173                        .sin_addr.s_addr = hdr->src_addr.ip4.addr,
1174                        .sin_port = hdr->port,
1175                };
1176        }
1177}
1178
1179static void cma_save_ip6_info(struct sockaddr_in6 *src_addr,
1180                              struct sockaddr_in6 *dst_addr,
1181                              struct cma_hdr *hdr,
1182                              __be16 local_port)
1183{
1184        if (src_addr) {
1185                *src_addr = (struct sockaddr_in6) {
1186                        .sin6_family = AF_INET6,
1187                        .sin6_addr = hdr->dst_addr.ip6,
1188                        .sin6_port = local_port,
1189                };
1190        }
1191
1192        if (dst_addr) {
1193                *dst_addr = (struct sockaddr_in6) {
1194                        .sin6_family = AF_INET6,
1195                        .sin6_addr = hdr->src_addr.ip6,
1196                        .sin6_port = hdr->port,
1197                };
1198        }
1199}
1200
1201static u16 cma_port_from_service_id(__be64 service_id)
1202{
1203        return (u16)be64_to_cpu(service_id);
1204}
1205
1206static int cma_save_ip_info(struct sockaddr *src_addr,
1207                            struct sockaddr *dst_addr,
1208                            struct ib_cm_event *ib_event,
1209                            __be64 service_id)
1210{
1211        struct cma_hdr *hdr;
1212        __be16 port;
1213
1214        hdr = ib_event->private_data;
1215        if (hdr->cma_version != CMA_VERSION)
1216                return -EINVAL;
1217
1218        port = htons(cma_port_from_service_id(service_id));
1219
1220        switch (cma_get_ip_ver(hdr)) {
1221        case 4:
1222                cma_save_ip4_info((struct sockaddr_in *)src_addr,
1223                                  (struct sockaddr_in *)dst_addr, hdr, port);
1224                break;
1225        case 6:
1226                cma_save_ip6_info((struct sockaddr_in6 *)src_addr,
1227                                  (struct sockaddr_in6 *)dst_addr, hdr, port);
1228                break;
1229        default:
1230                return -EAFNOSUPPORT;
1231        }
1232
1233        return 0;
1234}
1235
1236static int cma_save_net_info(struct sockaddr *src_addr,
1237                             struct sockaddr *dst_addr,
1238                             struct rdma_cm_id *listen_id,
1239                             struct ib_cm_event *ib_event,
1240                             sa_family_t sa_family, __be64 service_id)
1241{
1242        if (sa_family == AF_IB) {
1243                if (ib_event->event == IB_CM_REQ_RECEIVED)
1244                        cma_save_ib_info(src_addr, dst_addr, listen_id,
1245                                         ib_event->param.req_rcvd.primary_path);
1246                else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
1247                        cma_save_ib_info(src_addr, dst_addr, listen_id, NULL);
1248                return 0;
1249        }
1250
1251        return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id);
1252}
1253
1254static int cma_save_req_info(const struct ib_cm_event *ib_event,
1255                             struct cma_req_info *req)
1256{
1257        const struct ib_cm_req_event_param *req_param =
1258                &ib_event->param.req_rcvd;
1259        const struct ib_cm_sidr_req_event_param *sidr_param =
1260                &ib_event->param.sidr_req_rcvd;
1261
1262        switch (ib_event->event) {
1263        case IB_CM_REQ_RECEIVED:
1264                req->device     = req_param->listen_id->device;
1265                req->port       = req_param->port;
1266                memcpy(&req->local_gid, &req_param->primary_path->sgid,
1267                       sizeof(req->local_gid));
1268                req->has_gid    = true;
1269                req->service_id = req_param->primary_path->service_id;
1270                req->pkey       = be16_to_cpu(req_param->primary_path->pkey);
1271                if (req->pkey != req_param->bth_pkey)
1272                        pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
1273                                            "RDMA CMA: in the future this may cause the request to be dropped\n",
1274                                            req_param->bth_pkey, req->pkey);
1275                break;
1276        case IB_CM_SIDR_REQ_RECEIVED:
1277                req->device     = sidr_param->listen_id->device;
1278                req->port       = sidr_param->port;
1279                req->has_gid    = false;
1280                req->service_id = sidr_param->service_id;
1281                req->pkey       = sidr_param->pkey;
1282                if (req->pkey != sidr_param->bth_pkey)
1283                        pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n"
1284                                            "RDMA CMA: in the future this may cause the request to be dropped\n",
1285                                            sidr_param->bth_pkey, req->pkey);
1286                break;
1287        default:
1288                return -EINVAL;
1289        }
1290
1291        return 0;
1292}
1293
1294static bool validate_ipv4_net_dev(struct net_device *net_dev,
1295                                  const struct sockaddr_in *dst_addr,
1296                                  const struct sockaddr_in *src_addr)
1297{
1298        __be32 daddr = dst_addr->sin_addr.s_addr,
1299               saddr = src_addr->sin_addr.s_addr;
1300        struct fib_result res;
1301        struct flowi4 fl4;
1302        int err;
1303        bool ret;
1304
1305        if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1306            ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) ||
1307            ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) ||
1308            ipv4_is_loopback(saddr))
1309                return false;
1310
1311        memset(&fl4, 0, sizeof(fl4));
1312        fl4.flowi4_iif = net_dev->ifindex;
1313        fl4.daddr = daddr;
1314        fl4.saddr = saddr;
1315
1316        rcu_read_lock();
1317        err = fib_lookup(dev_net(net_dev), &fl4, &res, 0);
1318        ret = err == 0 && FIB_RES_DEV(res) == net_dev;
1319        rcu_read_unlock();
1320
1321        return ret;
1322}
1323
1324static bool validate_ipv6_net_dev(struct net_device *net_dev,
1325                                  const struct sockaddr_in6 *dst_addr,
1326                                  const struct sockaddr_in6 *src_addr)
1327{
1328#if IS_ENABLED(CONFIG_IPV6)
1329        const int strict = ipv6_addr_type(&dst_addr->sin6_addr) &
1330                           IPV6_ADDR_LINKLOCAL;
1331        struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
1332                                         &src_addr->sin6_addr, net_dev->ifindex,
1333                                         strict);
1334        bool ret;
1335
1336        if (!rt)
1337                return false;
1338
1339        ret = rt->rt6i_idev->dev == net_dev;
1340        ip6_rt_put(rt);
1341
1342        return ret;
1343#else
1344        return false;
1345#endif
1346}
1347
1348static bool validate_net_dev(struct net_device *net_dev,
1349                             const struct sockaddr *daddr,
1350                             const struct sockaddr *saddr)
1351{
1352        const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr;
1353        const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr;
1354        const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
1355        const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr;
1356
1357        switch (daddr->sa_family) {
1358        case AF_INET:
1359                return saddr->sa_family == AF_INET &&
1360                       validate_ipv4_net_dev(net_dev, daddr4, saddr4);
1361
1362        case AF_INET6:
1363                return saddr->sa_family == AF_INET6 &&
1364                       validate_ipv6_net_dev(net_dev, daddr6, saddr6);
1365
1366        default:
1367                return false;
1368        }
1369}
1370
1371static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
1372                                          const struct cma_req_info *req)
1373{
1374        struct sockaddr_storage listen_addr_storage, src_addr_storage;
1375        struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage,
1376                        *src_addr = (struct sockaddr *)&src_addr_storage;
1377        struct net_device *net_dev;
1378        const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL;
1379        int err;
1380
1381        err = cma_save_ip_info(listen_addr, src_addr, ib_event,
1382                               req->service_id);
1383        if (err)
1384                return ERR_PTR(err);
1385
1386        net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey,
1387                                           gid, listen_addr);
1388        if (!net_dev)
1389                return ERR_PTR(-ENODEV);
1390
1391        if (!validate_net_dev(net_dev, listen_addr, src_addr)) {
1392                dev_put(net_dev);
1393                return ERR_PTR(-EHOSTUNREACH);
1394        }
1395
1396        return net_dev;
1397}
1398
1399static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id)
1400{
1401        return (be64_to_cpu(service_id) >> 16) & 0xffff;
1402}
1403
1404static bool cma_match_private_data(struct rdma_id_private *id_priv,
1405                                   const struct cma_hdr *hdr)
1406{
1407        struct sockaddr *addr = cma_src_addr(id_priv);
1408        __be32 ip4_addr;
1409        struct in6_addr ip6_addr;
1410
1411        if (cma_any_addr(addr) && !id_priv->afonly)
1412                return true;
1413
1414        switch (addr->sa_family) {
1415        case AF_INET:
1416                ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
1417                if (cma_get_ip_ver(hdr) != 4)
1418                        return false;
1419                if (!cma_any_addr(addr) &&
1420                    hdr->dst_addr.ip4.addr != ip4_addr)
1421                        return false;
1422                break;
1423        case AF_INET6:
1424                ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr;
1425                if (cma_get_ip_ver(hdr) != 6)
1426                        return false;
1427                if (!cma_any_addr(addr) &&
1428                    memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr)))
1429                        return false;
1430                break;
1431        case AF_IB:
1432                return true;
1433        default:
1434                return false;
1435        }
1436
1437        return true;
1438}
1439
1440static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num)
1441{
1442        enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num);
1443        enum rdma_transport_type transport =
1444                rdma_node_get_transport(device->node_type);
1445
1446        return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB;
1447}
1448
1449static bool cma_protocol_roce(const struct rdma_cm_id *id)
1450{
1451        struct ib_device *device = id->device;
1452        const int port_num = id->port_num ?: rdma_start_port(device);
1453
1454        return cma_protocol_roce_dev_port(device, port_num);
1455}
1456
1457static bool cma_match_net_dev(const struct rdma_cm_id *id,
1458                              const struct net_device *net_dev,
1459                              u8 port_num)
1460{
1461        const struct rdma_addr *addr = &id->route.addr;
1462
1463        if (!net_dev)
1464                /* This request is an AF_IB request or a RoCE request */
1465                return (!id->port_num || id->port_num == port_num) &&
1466                       (addr->src_addr.ss_family == AF_IB ||
1467                        cma_protocol_roce_dev_port(id->device, port_num));
1468
1469        return !addr->dev_addr.bound_dev_if ||
1470               (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
1471                addr->dev_addr.bound_dev_if == net_dev->ifindex);
1472}
1473
1474static struct rdma_id_private *cma_find_listener(
1475                const struct rdma_bind_list *bind_list,
1476                const struct ib_cm_id *cm_id,
1477                const struct ib_cm_event *ib_event,
1478                const struct cma_req_info *req,
1479                const struct net_device *net_dev)
1480{
1481        struct rdma_id_private *id_priv, *id_priv_dev;
1482
1483        if (!bind_list)
1484                return ERR_PTR(-EINVAL);
1485
1486        hlist_for_each_entry(id_priv, &bind_list->owners, node) {
1487                if (cma_match_private_data(id_priv, ib_event->private_data)) {
1488                        if (id_priv->id.device == cm_id->device &&
1489                            cma_match_net_dev(&id_priv->id, net_dev, req->port))
1490                                return id_priv;
1491                        list_for_each_entry(id_priv_dev,
1492                                            &id_priv->listen_list,
1493                                            listen_list) {
1494                                if (id_priv_dev->id.device == cm_id->device &&
1495                                    cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
1496                                        return id_priv_dev;
1497                        }
1498                }
1499        }
1500
1501        return ERR_PTR(-EINVAL);
1502}
1503
1504static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
1505                                                 struct ib_cm_event *ib_event,
1506                                                 struct net_device **net_dev)
1507{
1508        struct cma_req_info req;
1509        struct rdma_bind_list *bind_list;
1510        struct rdma_id_private *id_priv;
1511        int err;
1512
1513        err = cma_save_req_info(ib_event, &req);
1514        if (err)
1515                return ERR_PTR(err);
1516
1517        *net_dev = cma_get_net_dev(ib_event, &req);
1518        if (IS_ERR(*net_dev)) {
1519                if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
1520                        /* Assuming the protocol is AF_IB */
1521                        *net_dev = NULL;
1522                } else if (cma_protocol_roce_dev_port(req.device, req.port)) {
1523                        /* TODO find the net dev matching the request parameters
1524                         * through the RoCE GID table */
1525                        *net_dev = NULL;
1526                } else {
1527                        return ERR_CAST(*net_dev);
1528                }
1529        }
1530
1531        bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
1532                                rdma_ps_from_service_id(req.service_id),
1533                                cma_port_from_service_id(req.service_id));
1534        id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
1535        if (IS_ERR(id_priv) && *net_dev) {
1536                dev_put(*net_dev);
1537                *net_dev = NULL;
1538        }
1539
1540        return id_priv;
1541}
1542
1543static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
1544{
1545        return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
1546}
1547
1548static void cma_cancel_route(struct rdma_id_private *id_priv)
1549{
1550        if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) {
1551                if (id_priv->query)
1552                        ib_sa_cancel_query(id_priv->query_id, id_priv->query);
1553        }
1554}
1555
1556static void cma_cancel_listens(struct rdma_id_private *id_priv)
1557{
1558        struct rdma_id_private *dev_id_priv;
1559
1560        /*
1561         * Remove from listen_any_list to prevent added devices from spawning
1562         * additional listen requests.
1563         */
1564        mutex_lock(&lock);
1565        list_del(&id_priv->list);
1566
1567        while (!list_empty(&id_priv->listen_list)) {
1568                dev_id_priv = list_entry(id_priv->listen_list.next,
1569                                         struct rdma_id_private, listen_list);
1570                /* sync with device removal to avoid duplicate destruction */
1571                list_del_init(&dev_id_priv->list);
1572                list_del(&dev_id_priv->listen_list);
1573                mutex_unlock(&lock);
1574
1575                rdma_destroy_id(&dev_id_priv->id);
1576                mutex_lock(&lock);
1577        }
1578        mutex_unlock(&lock);
1579}
1580
1581static void cma_cancel_operation(struct rdma_id_private *id_priv,
1582                                 enum rdma_cm_state state)
1583{
1584        switch (state) {
1585        case RDMA_CM_ADDR_QUERY:
1586                rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
1587                break;
1588        case RDMA_CM_ROUTE_QUERY:
1589                cma_cancel_route(id_priv);
1590                break;
1591        case RDMA_CM_LISTEN:
1592                if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)
1593                        cma_cancel_listens(id_priv);
1594                break;
1595        default:
1596                break;
1597        }
1598}
1599
1600static void cma_release_port(struct rdma_id_private *id_priv)
1601{
1602        struct rdma_bind_list *bind_list = id_priv->bind_list;
1603        struct net *net = id_priv->id.route.addr.dev_addr.net;
1604
1605        if (!bind_list)
1606                return;
1607
1608        mutex_lock(&lock);
1609        hlist_del(&id_priv->node);
1610        if (hlist_empty(&bind_list->owners)) {
1611                cma_ps_remove(net, bind_list->ps, bind_list->port);
1612                kfree(bind_list);
1613        }
1614        mutex_unlock(&lock);
1615}
1616
1617static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
1618{
1619        struct cma_multicast *mc;
1620
1621        while (!list_empty(&id_priv->mc_list)) {
1622                mc = container_of(id_priv->mc_list.next,
1623                                  struct cma_multicast, list);
1624                list_del(&mc->list);
1625                if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
1626                                      id_priv->id.port_num)) {
1627                        ib_sa_free_multicast(mc->multicast.ib);
1628                        kfree(mc);
1629                } else {
1630                        if (mc->igmp_joined) {
1631                                struct rdma_dev_addr *dev_addr =
1632                                        &id_priv->id.route.addr.dev_addr;
1633                                struct net_device *ndev = NULL;
1634
1635                                if (dev_addr->bound_dev_if)
1636                                        ndev = dev_get_by_index(&init_net,
1637                                                                dev_addr->bound_dev_if);
1638                                if (ndev) {
1639                                        cma_igmp_send(ndev,
1640                                                      &mc->multicast.ib->rec.mgid,
1641                                                      false);
1642                                        dev_put(ndev);
1643                                }
1644                        }
1645                        kref_put(&mc->mcref, release_mc);
1646                }
1647        }
1648}
1649
1650void rdma_destroy_id(struct rdma_cm_id *id)
1651{
1652        struct rdma_id_private *id_priv;
1653        enum rdma_cm_state state;
1654
1655        id_priv = container_of(id, struct rdma_id_private, id);
1656        state = cma_exch(id_priv, RDMA_CM_DESTROYING);
1657        cma_cancel_operation(id_priv, state);
1658
1659        /*
1660         * Wait for any active callback to finish.  New callbacks will find
1661         * the id_priv state set to destroying and abort.
1662         */
1663        mutex_lock(&id_priv->handler_mutex);
1664        mutex_unlock(&id_priv->handler_mutex);
1665
1666        if (id_priv->cma_dev) {
1667                if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
1668                        if (id_priv->cm_id.ib)
1669                                ib_destroy_cm_id(id_priv->cm_id.ib);
1670                } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) {
1671                        if (id_priv->cm_id.iw)
1672                                iw_destroy_cm_id(id_priv->cm_id.iw);
1673                }
1674                cma_leave_mc_groups(id_priv);
1675                cma_release_dev(id_priv);
1676        }
1677
1678        cma_release_port(id_priv);
1679        cma_deref_id(id_priv);
1680        wait_for_completion(&id_priv->comp);
1681
1682        if (id_priv->internal_id)
1683                cma_deref_id(id_priv->id.context);
1684
1685        kfree(id_priv->id.route.path_rec);
1686        put_net(id_priv->id.route.addr.dev_addr.net);
1687        kfree(id_priv);
1688}
1689EXPORT_SYMBOL(rdma_destroy_id);
1690
1691static int cma_rep_recv(struct rdma_id_private *id_priv)
1692{
1693        int ret;
1694
1695        ret = cma_modify_qp_rtr(id_priv, NULL);
1696        if (ret)
1697                goto reject;
1698
1699        ret = cma_modify_qp_rts(id_priv, NULL);
1700        if (ret)
1701                goto reject;
1702
1703        ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
1704        if (ret)
1705                goto reject;
1706
1707        return 0;
1708reject:
1709        pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
1710        cma_modify_qp_err(id_priv);
1711        ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
1712                       NULL, 0, NULL, 0);
1713        return ret;
1714}
1715
1716static void cma_set_rep_event_data(struct rdma_cm_event *event,
1717                                   struct ib_cm_rep_event_param *rep_data,
1718                                   void *private_data)
1719{
1720        event->param.conn.private_data = private_data;
1721        event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
1722        event->param.conn.responder_resources = rep_data->responder_resources;
1723        event->param.conn.initiator_depth = rep_data->initiator_depth;
1724        event->param.conn.flow_control = rep_data->flow_control;
1725        event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
1726        event->param.conn.srq = rep_data->srq;
1727        event->param.conn.qp_num = rep_data->remote_qpn;
1728}
1729
1730static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1731{
1732        struct rdma_id_private *id_priv = cm_id->context;
1733        struct rdma_cm_event event;
1734        int ret = 0;
1735
1736        mutex_lock(&id_priv->handler_mutex);
1737        if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
1738             id_priv->state != RDMA_CM_CONNECT) ||
1739            (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
1740             id_priv->state != RDMA_CM_DISCONNECT))
1741                goto out;
1742
1743        memset(&event, 0, sizeof event);
1744        switch (ib_event->event) {
1745        case IB_CM_REQ_ERROR:
1746        case IB_CM_REP_ERROR:
1747                event.event = RDMA_CM_EVENT_UNREACHABLE;
1748                event.status = -ETIMEDOUT;
1749                break;
1750        case IB_CM_REP_RECEIVED:
1751                if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
1752                    (id_priv->id.qp_type != IB_QPT_UD))
1753                        ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1754                if (id_priv->id.qp) {
1755                        event.status = cma_rep_recv(id_priv);
1756                        event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
1757                                                     RDMA_CM_EVENT_ESTABLISHED;
1758                } else {
1759                        event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1760                }
1761                cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
1762                                       ib_event->private_data);
1763                break;
1764        case IB_CM_RTU_RECEIVED:
1765        case IB_CM_USER_ESTABLISHED:
1766                event.event = RDMA_CM_EVENT_ESTABLISHED;
1767                break;
1768        case IB_CM_DREQ_ERROR:
1769                event.status = -ETIMEDOUT; /* fall through */
1770        case IB_CM_DREQ_RECEIVED:
1771        case IB_CM_DREP_RECEIVED:
1772                if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
1773                                   RDMA_CM_DISCONNECT))
1774                        goto out;
1775                event.event = RDMA_CM_EVENT_DISCONNECTED;
1776                break;
1777        case IB_CM_TIMEWAIT_EXIT:
1778                event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
1779                break;
1780        case IB_CM_MRA_RECEIVED:
1781                /* ignore event */
1782                goto out;
1783        case IB_CM_REJ_RECEIVED:
1784                pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id,
1785                                                                                ib_event->param.rej_rcvd.reason));
1786                cma_modify_qp_err(id_priv);
1787                event.status = ib_event->param.rej_rcvd.reason;
1788                event.event = RDMA_CM_EVENT_REJECTED;
1789                event.param.conn.private_data = ib_event->private_data;
1790                event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1791                break;
1792        default:
1793                pr_err("RDMA CMA: unexpected IB CM event: %d\n",
1794                       ib_event->event);
1795                goto out;
1796        }
1797
1798        ret = id_priv->id.event_handler(&id_priv->id, &event);
1799        if (ret) {
1800                /* Destroy the CM ID by returning a non-zero value. */
1801                id_priv->cm_id.ib = NULL;
1802                cma_exch(id_priv, RDMA_CM_DESTROYING);
1803                mutex_unlock(&id_priv->handler_mutex);
1804                rdma_destroy_id(&id_priv->id);
1805                return ret;
1806        }
1807out:
1808        mutex_unlock(&id_priv->handler_mutex);
1809        return ret;
1810}
1811
1812static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1813                                               struct ib_cm_event *ib_event,
1814                                               struct net_device *net_dev)
1815{
1816        struct rdma_id_private *id_priv;
1817        struct rdma_cm_id *id;
1818        struct rdma_route *rt;
1819        const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
1820        struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path;
1821        const __be64 service_id =
1822                ib_event->param.req_rcvd.primary_path->service_id;
1823        int ret;
1824
1825        id = rdma_create_id(listen_id->route.addr.dev_addr.net,
1826                            listen_id->event_handler, listen_id->context,
1827                            listen_id->ps, ib_event->param.req_rcvd.qp_type);
1828        if (IS_ERR(id))
1829                return NULL;
1830
1831        id_priv = container_of(id, struct rdma_id_private, id);
1832        if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
1833                              (struct sockaddr *)&id->route.addr.dst_addr,
1834                              listen_id, ib_event, ss_family, service_id))
1835                goto err;
1836
1837        rt = &id->route;
1838        rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1839        rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1840                               GFP_KERNEL);
1841        if (!rt->path_rec)
1842                goto err;
1843
1844        rt->path_rec[0] = *path;
1845        if (rt->num_paths == 2)
1846                rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1847
1848        if (net_dev) {
1849                ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
1850                if (ret)
1851                        goto err;
1852        } else {
1853                if (!cma_protocol_roce(listen_id) &&
1854                    cma_any_addr(cma_src_addr(id_priv))) {
1855                        rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
1856                        rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1857                        ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
1858                } else if (!cma_any_addr(cma_src_addr(id_priv))) {
1859                        ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
1860                        if (ret)
1861                                goto err;
1862                }
1863        }
1864        rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1865
1866        id_priv->state = RDMA_CM_CONNECT;
1867        return id_priv;
1868
1869err:
1870        rdma_destroy_id(id);
1871        return NULL;
1872}
1873
1874static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1875                                              struct ib_cm_event *ib_event,
1876                                              struct net_device *net_dev)
1877{
1878        struct rdma_id_private *id_priv;
1879        struct rdma_cm_id *id;
1880        const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
1881        struct net *net = listen_id->route.addr.dev_addr.net;
1882        int ret;
1883
1884        id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
1885                            listen_id->ps, IB_QPT_UD);
1886        if (IS_ERR(id))
1887                return NULL;
1888
1889        id_priv = container_of(id, struct rdma_id_private, id);
1890        if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
1891                              (struct sockaddr *)&id->route.addr.dst_addr,
1892                              listen_id, ib_event, ss_family,
1893                              ib_event->param.sidr_req_rcvd.service_id))
1894                goto err;
1895
1896        if (net_dev) {
1897                ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
1898                if (ret)
1899                        goto err;
1900        } else {
1901                if (!cma_any_addr(cma_src_addr(id_priv))) {
1902                        ret = cma_translate_addr(cma_src_addr(id_priv),
1903                                                 &id->route.addr.dev_addr);
1904                        if (ret)
1905                                goto err;
1906                }
1907        }
1908
1909        id_priv->state = RDMA_CM_CONNECT;
1910        return id_priv;
1911err:
1912        rdma_destroy_id(id);
1913        return NULL;
1914}
1915
1916static void cma_set_req_event_data(struct rdma_cm_event *event,
1917                                   struct ib_cm_req_event_param *req_data,
1918                                   void *private_data, int offset)
1919{
1920        event->param.conn.private_data = private_data + offset;
1921        event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1922        event->param.conn.responder_resources = req_data->responder_resources;
1923        event->param.conn.initiator_depth = req_data->initiator_depth;
1924        event->param.conn.flow_control = req_data->flow_control;
1925        event->param.conn.retry_count = req_data->retry_count;
1926        event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1927        event->param.conn.srq = req_data->srq;
1928        event->param.conn.qp_num = req_data->remote_qpn;
1929}
1930
1931static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
1932{
1933        return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
1934                 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
1935                ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
1936                 (id->qp_type == IB_QPT_UD)) ||
1937                (!id->qp_type));
1938}
1939
1940static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1941{
1942        struct rdma_id_private *listen_id, *conn_id = NULL;
1943        struct rdma_cm_event event;
1944        struct net_device *net_dev;
1945        int offset, ret;
1946
1947        listen_id = cma_id_from_event(cm_id, ib_event, &net_dev);
1948        if (IS_ERR(listen_id))
1949                return PTR_ERR(listen_id);
1950
1951        if (!cma_check_req_qp_type(&listen_id->id, ib_event)) {
1952                ret = -EINVAL;
1953                goto net_dev_put;
1954        }
1955
1956        mutex_lock(&listen_id->handler_mutex);
1957        if (listen_id->state != RDMA_CM_LISTEN) {
1958                ret = -ECONNABORTED;
1959                goto err1;
1960        }
1961
1962        memset(&event, 0, sizeof event);
1963        offset = cma_user_data_offset(listen_id);
1964        event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1965        if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
1966                conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev);
1967                event.param.ud.private_data = ib_event->private_data + offset;
1968                event.param.ud.private_data_len =
1969                                IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1970        } else {
1971                conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev);
1972                cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1973                                       ib_event->private_data, offset);
1974        }
1975        if (!conn_id) {
1976                ret = -ENOMEM;
1977                goto err1;
1978        }
1979
1980        mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1981        ret = cma_acquire_dev(conn_id, listen_id);
1982        if (ret)
1983                goto err2;
1984
1985        conn_id->cm_id.ib = cm_id;
1986        cm_id->context = conn_id;
1987        cm_id->cm_handler = cma_ib_handler;
1988
1989        /*
1990         * Protect against the user destroying conn_id from another thread
1991         * until we're done accessing it.
1992         */
1993        atomic_inc(&conn_id->refcount);
1994        ret = conn_id->id.event_handler(&conn_id->id, &event);
1995        if (ret)
1996                goto err3;
1997        /*
1998         * Acquire mutex to prevent user executing rdma_destroy_id()
1999         * while we're accessing the cm_id.
2000         */
2001        mutex_lock(&lock);
2002        if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
2003            (conn_id->id.qp_type != IB_QPT_UD))
2004                ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
2005        mutex_unlock(&lock);
2006        mutex_unlock(&conn_id->handler_mutex);
2007        mutex_unlock(&listen_id->handler_mutex);
2008        cma_deref_id(conn_id);
2009        if (net_dev)
2010                dev_put(net_dev);
2011        return 0;
2012
2013err3:
2014        cma_deref_id(conn_id);
2015        /* Destroy the CM ID by returning a non-zero value. */
2016        conn_id->cm_id.ib = NULL;
2017err2:
2018        cma_exch(conn_id, RDMA_CM_DESTROYING);
2019        mutex_unlock(&conn_id->handler_mutex);
2020err1:
2021        mutex_unlock(&listen_id->handler_mutex);
2022        if (conn_id)
2023                rdma_destroy_id(&conn_id->id);
2024
2025net_dev_put:
2026        if (net_dev)
2027                dev_put(net_dev);
2028
2029        return ret;
2030}
2031
2032__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
2033{
2034        if (addr->sa_family == AF_IB)
2035                return ((struct sockaddr_ib *) addr)->sib_sid;
2036
2037        return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));
2038}
2039EXPORT_SYMBOL(rdma_get_service_id);
2040
2041static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
2042{
2043        struct rdma_id_private *id_priv = iw_id->context;
2044        struct rdma_cm_event event;
2045        int ret = 0;
2046        struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2047        struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
2048
2049        mutex_lock(&id_priv->handler_mutex);
2050        if (id_priv->state != RDMA_CM_CONNECT)
2051                goto out;
2052
2053        memset(&event, 0, sizeof event);
2054        switch (iw_event->event) {
2055        case IW_CM_EVENT_CLOSE:
2056                event.event = RDMA_CM_EVENT_DISCONNECTED;
2057                break;
2058        case IW_CM_EVENT_CONNECT_REPLY:
2059                memcpy(cma_src_addr(id_priv), laddr,
2060                       rdma_addr_size(laddr));
2061                memcpy(cma_dst_addr(id_priv), raddr,
2062                       rdma_addr_size(raddr));
2063                switch (iw_event->status) {
2064                case 0:
2065                        event.event = RDMA_CM_EVENT_ESTABLISHED;
2066                        event.param.conn.initiator_depth = iw_event->ird;
2067                        event.param.conn.responder_resources = iw_event->ord;
2068                        break;
2069                case -ECONNRESET:
2070                case -ECONNREFUSED:
2071                        event.event = RDMA_CM_EVENT_REJECTED;
2072                        break;
2073                case -ETIMEDOUT:
2074                        event.event = RDMA_CM_EVENT_UNREACHABLE;
2075                        break;
2076                default:
2077                        event.event = RDMA_CM_EVENT_CONNECT_ERROR;
2078                        break;
2079                }
2080                break;
2081        case IW_CM_EVENT_ESTABLISHED:
2082                event.event = RDMA_CM_EVENT_ESTABLISHED;
2083                event.param.conn.initiator_depth = iw_event->ird;
2084                event.param.conn.responder_resources = iw_event->ord;
2085                break;
2086        default:
2087                BUG_ON(1);
2088        }
2089
2090        event.status = iw_event->status;
2091        event.param.conn.private_data = iw_event->private_data;
2092        event.param.conn.private_data_len = iw_event->private_data_len;
2093        ret = id_priv->id.event_handler(&id_priv->id, &event);
2094        if (ret) {
2095                /* Destroy the CM ID by returning a non-zero value. */
2096                id_priv->cm_id.iw = NULL;
2097                cma_exch(id_priv, RDMA_CM_DESTROYING);
2098                mutex_unlock(&id_priv->handler_mutex);
2099                rdma_destroy_id(&id_priv->id);
2100                return ret;
2101        }
2102
2103out:
2104        mutex_unlock(&id_priv->handler_mutex);
2105        return ret;
2106}
2107
2108static int iw_conn_req_handler(struct iw_cm_id *cm_id,
2109                               struct iw_cm_event *iw_event)
2110{
2111        struct rdma_cm_id *new_cm_id;
2112        struct rdma_id_private *listen_id, *conn_id;
2113        struct rdma_cm_event event;
2114        int ret = -ECONNABORTED;
2115        struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2116        struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
2117
2118        listen_id = cm_id->context;
2119
2120        mutex_lock(&listen_id->handler_mutex);
2121        if (listen_id->state != RDMA_CM_LISTEN)
2122                goto out;
2123
2124        /* Create a new RDMA id for the new IW CM ID */
2125        new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
2126                                   listen_id->id.event_handler,
2127                                   listen_id->id.context,
2128                                   RDMA_PS_TCP, IB_QPT_RC);
2129        if (IS_ERR(new_cm_id)) {
2130                ret = -ENOMEM;
2131                goto out;
2132        }
2133        conn_id = container_of(new_cm_id, struct rdma_id_private, id);
2134        mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2135        conn_id->state = RDMA_CM_CONNECT;
2136
2137        ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
2138        if (ret) {
2139                mutex_unlock(&conn_id->handler_mutex);
2140                rdma_destroy_id(new_cm_id);
2141                goto out;
2142        }
2143
2144        ret = cma_acquire_dev(conn_id, listen_id);
2145        if (ret) {
2146                mutex_unlock(&conn_id->handler_mutex);
2147                rdma_destroy_id(new_cm_id);
2148                goto out;
2149        }
2150
2151        conn_id->cm_id.iw = cm_id;
2152        cm_id->context = conn_id;
2153        cm_id->cm_handler = cma_iw_handler;
2154
2155        memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
2156        memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
2157
2158        memset(&event, 0, sizeof event);
2159        event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
2160        event.param.conn.private_data = iw_event->private_data;
2161        event.param.conn.private_data_len = iw_event->private_data_len;
2162        event.param.conn.initiator_depth = iw_event->ird;
2163        event.param.conn.responder_resources = iw_event->ord;
2164
2165        /*
2166         * Protect against the user destroying conn_id from another thread
2167         * until we're done accessing it.
2168         */
2169        atomic_inc(&conn_id->refcount);
2170        ret = conn_id->id.event_handler(&conn_id->id, &event);
2171        if (ret) {
2172                /* User wants to destroy the CM ID */
2173                conn_id->cm_id.iw = NULL;
2174                cma_exch(conn_id, RDMA_CM_DESTROYING);
2175                mutex_unlock(&conn_id->handler_mutex);
2176                cma_deref_id(conn_id);
2177                rdma_destroy_id(&conn_id->id);
2178                goto out;
2179        }
2180
2181        mutex_unlock(&conn_id->handler_mutex);
2182        cma_deref_id(conn_id);
2183
2184out:
2185        mutex_unlock(&listen_id->handler_mutex);
2186        return ret;
2187}
2188
2189static int cma_ib_listen(struct rdma_id_private *id_priv)
2190{
2191        struct sockaddr *addr;
2192        struct ib_cm_id *id;
2193        __be64 svc_id;
2194
2195        addr = cma_src_addr(id_priv);
2196        svc_id = rdma_get_service_id(&id_priv->id, addr);
2197        id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id);
2198        if (IS_ERR(id))
2199                return PTR_ERR(id);
2200        id_priv->cm_id.ib = id;
2201
2202        return 0;
2203}
2204
2205static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
2206{
2207        int ret;
2208        struct iw_cm_id *id;
2209
2210        id = iw_create_cm_id(id_priv->id.device,
2211                             iw_conn_req_handler,
2212                             id_priv);
2213        if (IS_ERR(id))
2214                return PTR_ERR(id);
2215
2216        id->tos = id_priv->tos;
2217        id_priv->cm_id.iw = id;
2218
2219        memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
2220               rdma_addr_size(cma_src_addr(id_priv)));
2221
2222        ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
2223
2224        if (ret) {
2225                iw_destroy_cm_id(id_priv->cm_id.iw);
2226                id_priv->cm_id.iw = NULL;
2227        }
2228
2229        return ret;
2230}
2231
2232static int cma_listen_handler(struct rdma_cm_id *id,
2233                              struct rdma_cm_event *event)
2234{
2235        struct rdma_id_private *id_priv = id->context;
2236
2237        id->context = id_priv->id.context;
2238        id->event_handler = id_priv->id.event_handler;
2239        return id_priv->id.event_handler(id, event);
2240}
2241
2242static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2243                              struct cma_device *cma_dev)
2244{
2245        struct rdma_id_private *dev_id_priv;
2246        struct rdma_cm_id *id;
2247        struct net *net = id_priv->id.route.addr.dev_addr.net;
2248        int ret;
2249
2250        if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
2251                return;
2252
2253        id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
2254                            id_priv->id.qp_type);
2255        if (IS_ERR(id))
2256                return;
2257
2258        dev_id_priv = container_of(id, struct rdma_id_private, id);
2259
2260        dev_id_priv->state = RDMA_CM_ADDR_BOUND;
2261        memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
2262               rdma_addr_size(cma_src_addr(id_priv)));
2263
2264        _cma_attach_to_dev(dev_id_priv, cma_dev);
2265        list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
2266        atomic_inc(&id_priv->refcount);
2267        dev_id_priv->internal_id = 1;
2268        dev_id_priv->afonly = id_priv->afonly;
2269
2270        ret = rdma_listen(id, id_priv->backlog);
2271        if (ret)
2272                pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
2273                        ret, cma_dev->device->name);
2274}
2275
2276static void cma_listen_on_all(struct rdma_id_private *id_priv)
2277{
2278        struct cma_device *cma_dev;
2279
2280        mutex_lock(&lock);
2281        list_add_tail(&id_priv->list, &listen_any_list);
2282        list_for_each_entry(cma_dev, &dev_list, list)
2283                cma_listen_on_dev(id_priv, cma_dev);
2284        mutex_unlock(&lock);
2285}
2286
2287void rdma_set_service_type(struct rdma_cm_id *id, int tos)
2288{
2289        struct rdma_id_private *id_priv;
2290
2291        id_priv = container_of(id, struct rdma_id_private, id);
2292        id_priv->tos = (u8) tos;
2293        id_priv->tos_set = true;
2294}
2295EXPORT_SYMBOL(rdma_set_service_type);
2296
2297static void cma_query_handler(int status, struct sa_path_rec *path_rec,
2298                              void *context)
2299{
2300        struct cma_work *work = context;
2301        struct rdma_route *route;
2302
2303        route = &work->id->id.route;
2304
2305        if (!status) {
2306                route->num_paths = 1;
2307                *route->path_rec = *path_rec;
2308        } else {
2309                work->old_state = RDMA_CM_ROUTE_QUERY;
2310                work->new_state = RDMA_CM_ADDR_RESOLVED;
2311                work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
2312                work->event.status = status;
2313                pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
2314                                     status);
2315        }
2316
2317        queue_work(cma_wq, &work->work);
2318}
2319
2320static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
2321                              struct cma_work *work)
2322{
2323        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2324        struct sa_path_rec path_rec;
2325        ib_sa_comp_mask comp_mask;
2326        struct sockaddr_in6 *sin6;
2327        struct sockaddr_ib *sib;
2328
2329        memset(&path_rec, 0, sizeof path_rec);
2330
2331        if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num))
2332                path_rec.rec_type = SA_PATH_REC_TYPE_OPA;
2333        else
2334                path_rec.rec_type = SA_PATH_REC_TYPE_IB;
2335        rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
2336        rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
2337        path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2338        path_rec.numb_path = 1;
2339        path_rec.reversible = 1;
2340        path_rec.service_id = rdma_get_service_id(&id_priv->id,
2341                                                  cma_dst_addr(id_priv));
2342
2343        comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
2344                    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
2345                    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
2346
2347        switch (cma_family(id_priv)) {
2348        case AF_INET:
2349                path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
2350                comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
2351                break;
2352        case AF_INET6:
2353                sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
2354                path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
2355                comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
2356                break;
2357        case AF_IB:
2358                sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
2359                path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20);
2360                comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
2361                break;
2362        }
2363
2364        id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
2365                                               id_priv->id.port_num, &path_rec,
2366                                               comp_mask, timeout_ms,
2367                                               GFP_KERNEL, cma_query_handler,
2368                                               work, &id_priv->query);
2369
2370        return (id_priv->query_id < 0) ? id_priv->query_id : 0;
2371}
2372
2373static void cma_work_handler(struct work_struct *_work)
2374{
2375        struct cma_work *work = container_of(_work, struct cma_work, work);
2376        struct rdma_id_private *id_priv = work->id;
2377        int destroy = 0;
2378
2379        mutex_lock(&id_priv->handler_mutex);
2380        if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
2381                goto out;
2382
2383        if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
2384                cma_exch(id_priv, RDMA_CM_DESTROYING);
2385                destroy = 1;
2386        }
2387out:
2388        mutex_unlock(&id_priv->handler_mutex);
2389        cma_deref_id(id_priv);
2390        if (destroy)
2391                rdma_destroy_id(&id_priv->id);
2392        kfree(work);
2393}
2394
2395static void cma_ndev_work_handler(struct work_struct *_work)
2396{
2397        struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
2398        struct rdma_id_private *id_priv = work->id;
2399        int destroy = 0;
2400
2401        mutex_lock(&id_priv->handler_mutex);
2402        if (id_priv->state == RDMA_CM_DESTROYING ||
2403            id_priv->state == RDMA_CM_DEVICE_REMOVAL)
2404                goto out;
2405
2406        if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
2407                cma_exch(id_priv, RDMA_CM_DESTROYING);
2408                destroy = 1;
2409        }
2410
2411out:
2412        mutex_unlock(&id_priv->handler_mutex);
2413        cma_deref_id(id_priv);
2414        if (destroy)
2415                rdma_destroy_id(&id_priv->id);
2416        kfree(work);
2417}
2418
2419static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
2420{
2421        struct rdma_route *route = &id_priv->id.route;
2422        struct cma_work *work;
2423        int ret;
2424
2425        work = kzalloc(sizeof *work, GFP_KERNEL);
2426        if (!work)
2427                return -ENOMEM;
2428
2429        work->id = id_priv;
2430        INIT_WORK(&work->work, cma_work_handler);
2431        work->old_state = RDMA_CM_ROUTE_QUERY;
2432        work->new_state = RDMA_CM_ROUTE_RESOLVED;
2433        work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2434
2435        route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
2436        if (!route->path_rec) {
2437                ret = -ENOMEM;
2438                goto err1;
2439        }
2440
2441        ret = cma_query_ib_route(id_priv, timeout_ms, work);
2442        if (ret)
2443                goto err2;
2444
2445        return 0;
2446err2:
2447        kfree(route->path_rec);
2448        route->path_rec = NULL;
2449err1:
2450        kfree(work);
2451        return ret;
2452}
2453
2454int rdma_set_ib_paths(struct rdma_cm_id *id,
2455                      struct sa_path_rec *path_rec, int num_paths)
2456{
2457        struct rdma_id_private *id_priv;
2458        int ret;
2459
2460        id_priv = container_of(id, struct rdma_id_private, id);
2461        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
2462                           RDMA_CM_ROUTE_RESOLVED))
2463                return -EINVAL;
2464
2465        id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
2466                                     GFP_KERNEL);
2467        if (!id->route.path_rec) {
2468                ret = -ENOMEM;
2469                goto err;
2470        }
2471
2472        id->route.num_paths = num_paths;
2473        return 0;
2474err:
2475        cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
2476        return ret;
2477}
2478EXPORT_SYMBOL(rdma_set_ib_paths);
2479
2480static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
2481{
2482        struct cma_work *work;
2483
2484        work = kzalloc(sizeof *work, GFP_KERNEL);
2485        if (!work)
2486                return -ENOMEM;
2487
2488        work->id = id_priv;
2489        INIT_WORK(&work->work, cma_work_handler);
2490        work->old_state = RDMA_CM_ROUTE_QUERY;
2491        work->new_state = RDMA_CM_ROUTE_RESOLVED;
2492        work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2493        queue_work(cma_wq, &work->work);
2494        return 0;
2495}
2496
2497static int iboe_tos_to_sl(struct net_device *ndev, int tos)
2498{
2499        int prio;
2500        struct net_device *dev;
2501
2502        prio = rt_tos2priority(tos);
2503        dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
2504        if (dev->num_tc)
2505                return netdev_get_prio_tc_map(dev, prio);
2506
2507#if IS_ENABLED(CONFIG_VLAN_8021Q)
2508        if (is_vlan_dev(ndev))
2509                return (vlan_dev_get_egress_qos_mask(ndev, prio) &
2510                        VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
2511#endif
2512        return 0;
2513}
2514
2515static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
2516                                           unsigned long supported_gids,
2517                                           enum ib_gid_type default_gid)
2518{
2519        if ((network_type == RDMA_NETWORK_IPV4 ||
2520             network_type == RDMA_NETWORK_IPV6) &&
2521            test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
2522                return IB_GID_TYPE_ROCE_UDP_ENCAP;
2523
2524        return default_gid;
2525}
2526
2527static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2528{
2529        struct rdma_route *route = &id_priv->id.route;
2530        struct rdma_addr *addr = &route->addr;
2531        struct cma_work *work;
2532        int ret;
2533        struct net_device *ndev = NULL;
2534        enum ib_gid_type gid_type = IB_GID_TYPE_IB;
2535        u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
2536                                        rdma_start_port(id_priv->cma_dev->device)];
2537        u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
2538
2539
2540        work = kzalloc(sizeof *work, GFP_KERNEL);
2541        if (!work)
2542                return -ENOMEM;
2543
2544        work->id = id_priv;
2545        INIT_WORK(&work->work, cma_work_handler);
2546
2547        route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
2548        if (!route->path_rec) {
2549                ret = -ENOMEM;
2550                goto err1;
2551        }
2552
2553        route->num_paths = 1;
2554
2555        if (addr->dev_addr.bound_dev_if) {
2556                unsigned long supported_gids;
2557
2558                ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
2559                if (!ndev) {
2560                        ret = -ENODEV;
2561                        goto err2;
2562                }
2563
2564                supported_gids = roce_gid_type_mask_support(id_priv->id.device,
2565                                                            id_priv->id.port_num);
2566                gid_type = cma_route_gid_type(addr->dev_addr.network,
2567                                              supported_gids,
2568                                              id_priv->gid_type);
2569                route->path_rec->rec_type =
2570                        sa_conv_gid_to_pathrec_type(gid_type);
2571                sa_path_set_ndev(route->path_rec, &init_net);
2572                sa_path_set_ifindex(route->path_rec, ndev->ifindex);
2573        }
2574        if (!ndev) {
2575                ret = -ENODEV;
2576                goto err2;
2577        }
2578
2579        sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
2580
2581        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
2582                    &route->path_rec->sgid);
2583        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
2584                    &route->path_rec->dgid);
2585
2586        /* Use the hint from IP Stack to select GID Type */
2587        if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
2588                gid_type = ib_network_to_gid_type(addr->dev_addr.network);
2589        route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
2590
2591        if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
2592                /* TODO: get the hoplimit from the inet/inet6 device */
2593                route->path_rec->hop_limit = addr->dev_addr.hoplimit;
2594        else
2595                route->path_rec->hop_limit = 1;
2596        route->path_rec->reversible = 1;
2597        route->path_rec->pkey = cpu_to_be16(0xffff);
2598        route->path_rec->mtu_selector = IB_SA_EQ;
2599        route->path_rec->sl = iboe_tos_to_sl(ndev, tos);
2600        route->path_rec->traffic_class = tos;
2601        route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
2602        route->path_rec->rate_selector = IB_SA_EQ;
2603        route->path_rec->rate = iboe_get_rate(ndev);
2604        dev_put(ndev);
2605        route->path_rec->packet_life_time_selector = IB_SA_EQ;
2606        route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
2607        if (!route->path_rec->mtu) {
2608                ret = -EINVAL;
2609                goto err2;
2610        }
2611
2612        work->old_state = RDMA_CM_ROUTE_QUERY;
2613        work->new_state = RDMA_CM_ROUTE_RESOLVED;
2614        work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2615        work->event.status = 0;
2616
2617        queue_work(cma_wq, &work->work);
2618
2619        return 0;
2620
2621err2:
2622        kfree(route->path_rec);
2623        route->path_rec = NULL;
2624err1:
2625        kfree(work);
2626        return ret;
2627}
2628
2629int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
2630{
2631        struct rdma_id_private *id_priv;
2632        int ret;
2633
2634        id_priv = container_of(id, struct rdma_id_private, id);
2635        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
2636                return -EINVAL;
2637
2638        atomic_inc(&id_priv->refcount);
2639        if (rdma_cap_ib_sa(id->device, id->port_num))
2640                ret = cma_resolve_ib_route(id_priv, timeout_ms);
2641        else if (rdma_protocol_roce(id->device, id->port_num))
2642                ret = cma_resolve_iboe_route(id_priv);
2643        else if (rdma_protocol_iwarp(id->device, id->port_num))
2644                ret = cma_resolve_iw_route(id_priv, timeout_ms);
2645        else
2646                ret = -ENOSYS;
2647
2648        if (ret)
2649                goto err;
2650
2651        return 0;
2652err:
2653        cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
2654        cma_deref_id(id_priv);
2655        return ret;
2656}
2657EXPORT_SYMBOL(rdma_resolve_route);
2658
2659static void cma_set_loopback(struct sockaddr *addr)
2660{
2661        switch (addr->sa_family) {
2662        case AF_INET:
2663                ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
2664                break;
2665        case AF_INET6:
2666                ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr,
2667                              0, 0, 0, htonl(1));
2668                break;
2669        default:
2670                ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr,
2671                            0, 0, 0, htonl(1));
2672                break;
2673        }
2674}
2675
2676static int cma_bind_loopback(struct rdma_id_private *id_priv)
2677{
2678        struct cma_device *cma_dev, *cur_dev;
2679        union ib_gid gid;
2680        enum ib_port_state port_state;
2681        u16 pkey;
2682        int ret;
2683        u8 p;
2684
2685        cma_dev = NULL;
2686        mutex_lock(&lock);
2687        list_for_each_entry(cur_dev, &dev_list, list) {
2688                if (cma_family(id_priv) == AF_IB &&
2689                    !rdma_cap_ib_cm(cur_dev->device, 1))
2690                        continue;
2691
2692                if (!cma_dev)
2693                        cma_dev = cur_dev;
2694
2695                for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
2696                        if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
2697                            port_state == IB_PORT_ACTIVE) {
2698                                cma_dev = cur_dev;
2699                                goto port_found;
2700                        }
2701                }
2702        }
2703
2704        if (!cma_dev) {
2705                ret = -ENODEV;
2706                goto out;
2707        }
2708
2709        p = 1;
2710
2711port_found:
2712        ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
2713        if (ret)
2714                goto out;
2715
2716        ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
2717        if (ret)
2718                goto out;
2719
2720        id_priv->id.route.addr.dev_addr.dev_type =
2721                (rdma_protocol_ib(cma_dev->device, p)) ?
2722                ARPHRD_INFINIBAND : ARPHRD_ETHER;
2723
2724        rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2725        ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
2726        id_priv->id.port_num = p;
2727        cma_attach_to_dev(id_priv, cma_dev);
2728        cma_set_loopback(cma_src_addr(id_priv));
2729out:
2730        mutex_unlock(&lock);
2731        return ret;
2732}
2733
2734static void addr_handler(int status, struct sockaddr *src_addr,
2735                         struct rdma_dev_addr *dev_addr, void *context)
2736{
2737        struct rdma_id_private *id_priv = context;
2738        struct rdma_cm_event event;
2739
2740        memset(&event, 0, sizeof event);
2741        mutex_lock(&id_priv->handler_mutex);
2742        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
2743                           RDMA_CM_ADDR_RESOLVED))
2744                goto out;
2745
2746        memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
2747        if (!status && !id_priv->cma_dev) {
2748                status = cma_acquire_dev(id_priv, NULL);
2749                if (status)
2750                        pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
2751                                             status);
2752        } else {
2753                pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
2754        }
2755
2756        if (status) {
2757                if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
2758                                   RDMA_CM_ADDR_BOUND))
2759                        goto out;
2760                event.event = RDMA_CM_EVENT_ADDR_ERROR;
2761                event.status = status;
2762        } else
2763                event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2764
2765        if (id_priv->id.event_handler(&id_priv->id, &event)) {
2766                cma_exch(id_priv, RDMA_CM_DESTROYING);
2767                mutex_unlock(&id_priv->handler_mutex);
2768                cma_deref_id(id_priv);
2769                rdma_destroy_id(&id_priv->id);
2770                return;
2771        }
2772out:
2773        mutex_unlock(&id_priv->handler_mutex);
2774        cma_deref_id(id_priv);
2775}
2776
2777static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2778{
2779        struct cma_work *work;
2780        union ib_gid gid;
2781        int ret;
2782
2783        work = kzalloc(sizeof *work, GFP_KERNEL);
2784        if (!work)
2785                return -ENOMEM;
2786
2787        if (!id_priv->cma_dev) {
2788                ret = cma_bind_loopback(id_priv);
2789                if (ret)
2790                        goto err;
2791        }
2792
2793        rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2794        rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
2795
2796        work->id = id_priv;
2797        INIT_WORK(&work->work, cma_work_handler);
2798        work->old_state = RDMA_CM_ADDR_QUERY;
2799        work->new_state = RDMA_CM_ADDR_RESOLVED;
2800        work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2801        queue_work(cma_wq, &work->work);
2802        return 0;
2803err:
2804        kfree(work);
2805        return ret;
2806}
2807
2808static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
2809{
2810        struct cma_work *work;
2811        int ret;
2812
2813        work = kzalloc(sizeof *work, GFP_KERNEL);
2814        if (!work)
2815                return -ENOMEM;
2816
2817        if (!id_priv->cma_dev) {
2818                ret = cma_resolve_ib_dev(id_priv);
2819                if (ret)
2820                        goto err;
2821        }
2822
2823        rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
2824                &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
2825
2826        work->id = id_priv;
2827        INIT_WORK(&work->work, cma_work_handler);
2828        work->old_state = RDMA_CM_ADDR_QUERY;
2829        work->new_state = RDMA_CM_ADDR_RESOLVED;
2830        work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2831        queue_work(cma_wq, &work->work);
2832        return 0;
2833err:
2834        kfree(work);
2835        return ret;
2836}
2837
2838static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2839                         struct sockaddr *dst_addr)
2840{
2841        if (!src_addr || !src_addr->sa_family) {
2842                src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2843                src_addr->sa_family = dst_addr->sa_family;
2844                if (IS_ENABLED(CONFIG_IPV6) &&
2845                    dst_addr->sa_family == AF_INET6) {
2846                        struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
2847                        struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
2848                        src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
2849                        if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
2850                                id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
2851                } else if (dst_addr->sa_family == AF_IB) {
2852                        ((struct sockaddr_ib *) src_addr)->sib_pkey =
2853                                ((struct sockaddr_ib *) dst_addr)->sib_pkey;
2854                }
2855        }
2856        return rdma_bind_addr(id, src_addr);
2857}
2858
2859int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2860                      struct sockaddr *dst_addr, int timeout_ms)
2861{
2862        struct rdma_id_private *id_priv;
2863        int ret;
2864
2865        id_priv = container_of(id, struct rdma_id_private, id);
2866        memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
2867        if (id_priv->state == RDMA_CM_IDLE) {
2868                ret = cma_bind_addr(id, src_addr, dst_addr);
2869                if (ret) {
2870                        memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
2871                        return ret;
2872                }
2873        }
2874
2875        if (cma_family(id_priv) != dst_addr->sa_family) {
2876                memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
2877                return -EINVAL;
2878        }
2879
2880        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
2881                memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
2882                return -EINVAL;
2883        }
2884
2885        atomic_inc(&id_priv->refcount);
2886        if (cma_any_addr(dst_addr)) {
2887                ret = cma_resolve_loopback(id_priv);
2888        } else {
2889                if (dst_addr->sa_family == AF_IB) {
2890                        ret = cma_resolve_ib_addr(id_priv);
2891                } else {
2892                        ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
2893                                              dst_addr, &id->route.addr.dev_addr,
2894                                              timeout_ms, addr_handler, id_priv);
2895                }
2896        }
2897        if (ret)
2898                goto err;
2899
2900        return 0;
2901err:
2902        cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
2903        cma_deref_id(id_priv);
2904        return ret;
2905}
2906EXPORT_SYMBOL(rdma_resolve_addr);
2907
2908int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
2909{
2910        struct rdma_id_private *id_priv;
2911        unsigned long flags;
2912        int ret;
2913
2914        id_priv = container_of(id, struct rdma_id_private, id);
2915        spin_lock_irqsave(&id_priv->lock, flags);
2916        if (reuse || id_priv->state == RDMA_CM_IDLE) {
2917                id_priv->reuseaddr = reuse;
2918                ret = 0;
2919        } else {
2920                ret = -EINVAL;
2921        }
2922        spin_unlock_irqrestore(&id_priv->lock, flags);
2923        return ret;
2924}
2925EXPORT_SYMBOL(rdma_set_reuseaddr);
2926
2927int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
2928{
2929        struct rdma_id_private *id_priv;
2930        unsigned long flags;
2931        int ret;
2932
2933        id_priv = container_of(id, struct rdma_id_private, id);
2934        spin_lock_irqsave(&id_priv->lock, flags);
2935        if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
2936                id_priv->options |= (1 << CMA_OPTION_AFONLY);
2937                id_priv->afonly = afonly;
2938                ret = 0;
2939        } else {
2940                ret = -EINVAL;
2941        }
2942        spin_unlock_irqrestore(&id_priv->lock, flags);
2943        return ret;
2944}
2945EXPORT_SYMBOL(rdma_set_afonly);
2946
2947static void cma_bind_port(struct rdma_bind_list *bind_list,
2948                          struct rdma_id_private *id_priv)
2949{
2950        struct sockaddr *addr;
2951        struct sockaddr_ib *sib;
2952        u64 sid, mask;
2953        __be16 port;
2954
2955        addr = cma_src_addr(id_priv);
2956        port = htons(bind_list->port);
2957
2958        switch (addr->sa_family) {
2959        case AF_INET:
2960                ((struct sockaddr_in *) addr)->sin_port = port;
2961                break;
2962        case AF_INET6:
2963                ((struct sockaddr_in6 *) addr)->sin6_port = port;
2964                break;
2965        case AF_IB:
2966                sib = (struct sockaddr_ib *) addr;
2967                sid = be64_to_cpu(sib->sib_sid);
2968                mask = be64_to_cpu(sib->sib_sid_mask);
2969                sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port));
2970                sib->sib_sid_mask = cpu_to_be64(~0ULL);
2971                break;
2972        }
2973        id_priv->bind_list = bind_list;
2974        hlist_add_head(&id_priv->node, &bind_list->owners);
2975}
2976
2977static int cma_alloc_port(enum rdma_port_space ps,
2978                          struct rdma_id_private *id_priv, unsigned short snum)
2979{
2980        struct rdma_bind_list *bind_list;
2981        int ret;
2982
2983        bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
2984        if (!bind_list)
2985                return -ENOMEM;
2986
2987        ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list,
2988                           snum);
2989        if (ret < 0)
2990                goto err;
2991
2992        bind_list->ps = ps;
2993        bind_list->port = (unsigned short)ret;
2994        cma_bind_port(bind_list, id_priv);
2995        return 0;
2996err:
2997        kfree(bind_list);
2998        return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
2999}
3000
3001static int cma_port_is_unique(struct rdma_bind_list *bind_list,
3002                              struct rdma_id_private *id_priv)
3003{
3004        struct rdma_id_private *cur_id;
3005        struct sockaddr  *daddr = cma_dst_addr(id_priv);
3006        struct sockaddr  *saddr = cma_src_addr(id_priv);
3007        __be16 dport = cma_port(daddr);
3008
3009        hlist_for_each_entry(cur_id, &bind_list->owners, node) {
3010                struct sockaddr  *cur_daddr = cma_dst_addr(cur_id);
3011                struct sockaddr  *cur_saddr = cma_src_addr(cur_id);
3012                __be16 cur_dport = cma_port(cur_daddr);
3013
3014                if (id_priv == cur_id)
3015                        continue;
3016
3017                /* different dest port -> unique */
3018                if (!cma_any_port(cur_daddr) &&
3019                    (dport != cur_dport))
3020                        continue;
3021
3022                /* different src address -> unique */
3023                if (!cma_any_addr(saddr) &&
3024                    !cma_any_addr(cur_saddr) &&
3025                    cma_addr_cmp(saddr, cur_saddr))
3026                        continue;
3027
3028                /* different dst address -> unique */
3029                if (!cma_any_addr(cur_daddr) &&
3030                    cma_addr_cmp(daddr, cur_daddr))
3031                        continue;
3032
3033                return -EADDRNOTAVAIL;
3034        }
3035        return 0;
3036}
3037
3038static int cma_alloc_any_port(enum rdma_port_space ps,
3039                              struct rdma_id_private *id_priv)
3040{
3041        static unsigned int last_used_port;
3042        int low, high, remaining;
3043        unsigned int rover;
3044        struct net *net = id_priv->id.route.addr.dev_addr.net;
3045
3046        inet_get_local_port_range(net, &low, &high);
3047        remaining = (high - low) + 1;
3048        rover = prandom_u32() % remaining + low;
3049retry:
3050        if (last_used_port != rover) {
3051                struct rdma_bind_list *bind_list;
3052                int ret;
3053
3054                bind_list = cma_ps_find(net, ps, (unsigned short)rover);
3055
3056                if (!bind_list) {
3057                        ret = cma_alloc_port(ps, id_priv, rover);
3058                } else {
3059                        ret = cma_port_is_unique(bind_list, id_priv);
3060                        if (!ret)
3061                                cma_bind_port(bind_list, id_priv);
3062                }
3063                /*
3064                 * Remember previously used port number in order to avoid
3065                 * re-using same port immediately after it is closed.
3066                 */
3067                if (!ret)
3068                        last_used_port = rover;
3069                if (ret != -EADDRNOTAVAIL)
3070                        return ret;
3071        }
3072        if (--remaining) {
3073                rover++;
3074                if ((rover < low) || (rover > high))
3075                        rover = low;
3076                goto retry;
3077        }
3078        return -EADDRNOTAVAIL;
3079}
3080
3081/*
3082 * Check that the requested port is available.  This is called when trying to
3083 * bind to a specific port, or when trying to listen on a bound port.  In
3084 * the latter case, the provided id_priv may already be on the bind_list, but
3085 * we still need to check that it's okay to start listening.
3086 */
3087static int cma_check_port(struct rdma_bind_list *bind_list,
3088                          struct rdma_id_private *id_priv, uint8_t reuseaddr)
3089{
3090        struct rdma_id_private *cur_id;
3091        struct sockaddr *addr, *cur_addr;
3092
3093        addr = cma_src_addr(id_priv);
3094        hlist_for_each_entry(cur_id, &bind_list->owners, node) {
3095                if (id_priv == cur_id)
3096                        continue;
3097
3098                if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
3099                    cur_id->reuseaddr)
3100                        continue;
3101
3102                cur_addr = cma_src_addr(cur_id);
3103                if (id_priv->afonly && cur_id->afonly &&
3104                    (addr->sa_family != cur_addr->sa_family))
3105                        continue;
3106
3107                if (cma_any_addr(addr) || cma_any_addr(cur_addr))
3108                        return -EADDRNOTAVAIL;
3109
3110                if (!cma_addr_cmp(addr, cur_addr))
3111                        return -EADDRINUSE;
3112        }
3113        return 0;
3114}
3115
3116static int cma_use_port(enum rdma_port_space ps,
3117                        struct rdma_id_private *id_priv)
3118{
3119        struct rdma_bind_list *bind_list;
3120        unsigned short snum;
3121        int ret;
3122
3123        snum = ntohs(cma_port(cma_src_addr(id_priv)));
3124        if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
3125                return -EACCES;
3126
3127        bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum);
3128        if (!bind_list) {
3129                ret = cma_alloc_port(ps, id_priv, snum);
3130        } else {
3131                ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
3132                if (!ret)
3133                        cma_bind_port(bind_list, id_priv);
3134        }
3135        return ret;
3136}
3137
3138static int cma_bind_listen(struct rdma_id_private *id_priv)
3139{
3140        struct rdma_bind_list *bind_list = id_priv->bind_list;
3141        int ret = 0;
3142
3143        mutex_lock(&lock);
3144        if (bind_list->owners.first->next)
3145                ret = cma_check_port(bind_list, id_priv, 0);
3146        mutex_unlock(&lock);
3147        return ret;
3148}
3149
3150static enum rdma_port_space cma_select_inet_ps(
3151                struct rdma_id_private *id_priv)
3152{
3153        switch (id_priv->id.ps) {
3154        case RDMA_PS_TCP:
3155        case RDMA_PS_UDP:
3156        case RDMA_PS_IPOIB:
3157        case RDMA_PS_IB:
3158                return id_priv->id.ps;
3159        default:
3160
3161                return 0;
3162        }
3163}
3164
3165static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
3166{
3167        enum rdma_port_space ps = 0;
3168        struct sockaddr_ib *sib;
3169        u64 sid_ps, mask, sid;
3170
3171        sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
3172        mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK;
3173        sid = be64_to_cpu(sib->sib_sid) & mask;
3174
3175        if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
3176                sid_ps = RDMA_IB_IP_PS_IB;
3177                ps = RDMA_PS_IB;
3178        } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
3179                   (sid == (RDMA_IB_IP_PS_TCP & mask))) {
3180                sid_ps = RDMA_IB_IP_PS_TCP;
3181                ps = RDMA_PS_TCP;
3182        } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
3183                   (sid == (RDMA_IB_IP_PS_UDP & mask))) {
3184                sid_ps = RDMA_IB_IP_PS_UDP;
3185                ps = RDMA_PS_UDP;
3186        }
3187
3188        if (ps) {
3189                sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib)));
3190                sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK |
3191                                                be64_to_cpu(sib->sib_sid_mask));
3192        }
3193        return ps;
3194}
3195
3196static int cma_get_port(struct rdma_id_private *id_priv)
3197{
3198        enum rdma_port_space ps;
3199        int ret;
3200
3201        if (cma_family(id_priv) != AF_IB)
3202                ps = cma_select_inet_ps(id_priv);
3203        else
3204                ps = cma_select_ib_ps(id_priv);
3205        if (!ps)
3206                return -EPROTONOSUPPORT;
3207
3208        mutex_lock(&lock);
3209        if (cma_any_port(cma_src_addr(id_priv)))
3210                ret = cma_alloc_any_port(ps, id_priv);
3211        else
3212                ret = cma_use_port(ps, id_priv);
3213        mutex_unlock(&lock);
3214
3215        return ret;
3216}
3217
3218static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
3219                               struct sockaddr *addr)
3220{
3221#if IS_ENABLED(CONFIG_IPV6)
3222        struct sockaddr_in6 *sin6;
3223
3224        if (addr->sa_family != AF_INET6)
3225                return 0;
3226
3227        sin6 = (struct sockaddr_in6 *) addr;
3228
3229        if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
3230                return 0;
3231
3232        if (!sin6->sin6_scope_id)
3233                        return -EINVAL;
3234
3235        dev_addr->bound_dev_if = sin6->sin6_scope_id;
3236#endif
3237        return 0;
3238}
3239
3240int rdma_listen(struct rdma_cm_id *id, int backlog)
3241{
3242        struct rdma_id_private *id_priv;
3243        int ret;
3244
3245        id_priv = container_of(id, struct rdma_id_private, id);
3246        if (id_priv->state == RDMA_CM_IDLE) {
3247                id->route.addr.src_addr.ss_family = AF_INET;
3248                ret = rdma_bind_addr(id, cma_src_addr(id_priv));
3249                if (ret)
3250                        return ret;
3251        }
3252
3253        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
3254                return -EINVAL;
3255
3256        if (id_priv->reuseaddr) {
3257                ret = cma_bind_listen(id_priv);
3258                if (ret)
3259                        goto err;
3260        }
3261
3262        id_priv->backlog = backlog;
3263        if (id->device) {
3264                if (rdma_cap_ib_cm(id->device, 1)) {
3265                        ret = cma_ib_listen(id_priv);
3266                        if (ret)
3267                                goto err;
3268                } else if (rdma_cap_iw_cm(id->device, 1)) {
3269                        ret = cma_iw_listen(id_priv, backlog);
3270                        if (ret)
3271                                goto err;
3272                } else {
3273                        ret = -ENOSYS;
3274                        goto err;
3275                }
3276        } else
3277                cma_listen_on_all(id_priv);
3278
3279        return 0;
3280err:
3281        id_priv->backlog = 0;
3282        cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
3283        return ret;
3284}
3285EXPORT_SYMBOL(rdma_listen);
3286
3287int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
3288{
3289        struct rdma_id_private *id_priv;
3290        int ret;
3291        struct sockaddr  *daddr;
3292
3293        if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
3294            addr->sa_family != AF_IB)
3295                return -EAFNOSUPPORT;
3296
3297        id_priv = container_of(id, struct rdma_id_private, id);
3298        if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
3299                return -EINVAL;
3300
3301        ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
3302        if (ret)
3303                goto err1;
3304
3305        memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
3306        if (!cma_any_addr(addr)) {
3307                ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
3308                if (ret)
3309                        goto err1;
3310
3311                ret = cma_acquire_dev(id_priv, NULL);
3312                if (ret)
3313                        goto err1;
3314        }
3315
3316        if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
3317                if (addr->sa_family == AF_INET)
3318                        id_priv->afonly = 1;
3319#if IS_ENABLED(CONFIG_IPV6)
3320                else if (addr->sa_family == AF_INET6) {
3321                        struct net *net = id_priv->id.route.addr.dev_addr.net;
3322
3323                        id_priv->afonly = net->ipv6.sysctl.bindv6only;
3324                }
3325#endif
3326        }
3327        ret = cma_get_port(id_priv);
3328        if (ret)
3329                goto err2;
3330
3331        daddr = cma_dst_addr(id_priv);
3332        daddr->sa_family = addr->sa_family;
3333
3334        return 0;
3335err2:
3336        if (id_priv->cma_dev)
3337                cma_release_dev(id_priv);
3338err1:
3339        cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
3340        return ret;
3341}
3342EXPORT_SYMBOL(rdma_bind_addr);
3343
3344static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
3345{
3346        struct cma_hdr *cma_hdr;
3347
3348        cma_hdr = hdr;
3349        cma_hdr->cma_version = CMA_VERSION;
3350        if (cma_family(id_priv) == AF_INET) {
3351                struct sockaddr_in *src4, *dst4;
3352
3353                src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
3354                dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
3355
3356                cma_set_ip_ver(cma_hdr, 4);
3357                cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
3358                cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
3359                cma_hdr->port = src4->sin_port;
3360        } else if (cma_family(id_priv) == AF_INET6) {
3361                struct sockaddr_in6 *src6, *dst6;
3362
3363                src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
3364                dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
3365
3366                cma_set_ip_ver(cma_hdr, 6);
3367                cma_hdr->src_addr.ip6 = src6->sin6_addr;
3368                cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
3369                cma_hdr->port = src6->sin6_port;
3370        }
3371        return 0;
3372}
3373
3374static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
3375                                struct ib_cm_event *ib_event)
3376{
3377        struct rdma_id_private *id_priv = cm_id->context;
3378        struct rdma_cm_event event;
3379        struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
3380        int ret = 0;
3381
3382        mutex_lock(&id_priv->handler_mutex);
3383        if (id_priv->state != RDMA_CM_CONNECT)
3384                goto out;
3385
3386        memset(&event, 0, sizeof event);
3387        switch (ib_event->event) {
3388        case IB_CM_SIDR_REQ_ERROR:
3389                event.event = RDMA_CM_EVENT_UNREACHABLE;
3390                event.status = -ETIMEDOUT;
3391                break;
3392        case IB_CM_SIDR_REP_RECEIVED:
3393                event.param.ud.private_data = ib_event->private_data;
3394                event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
3395                if (rep->status != IB_SIDR_SUCCESS) {
3396                        event.event = RDMA_CM_EVENT_UNREACHABLE;
3397                        event.status = ib_event->param.sidr_rep_rcvd.status;
3398                        pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n",
3399                                             event.status);
3400                        break;
3401                }
3402                ret = cma_set_qkey(id_priv, rep->qkey);
3403                if (ret) {
3404                        pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret);
3405                        event.event = RDMA_CM_EVENT_ADDR_ERROR;
3406                        event.status = ret;
3407                        break;
3408                }
3409                ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
3410                                     id_priv->id.route.path_rec,
3411                                     &event.param.ud.ah_attr);
3412                event.param.ud.qp_num = rep->qpn;
3413                event.param.ud.qkey = rep->qkey;
3414                event.event = RDMA_CM_EVENT_ESTABLISHED;
3415                event.status = 0;
3416                break;
3417        default:
3418                pr_err("RDMA CMA: unexpected IB CM event: %d\n",
3419                       ib_event->event);
3420                goto out;
3421        }
3422
3423        ret = id_priv->id.event_handler(&id_priv->id, &event);
3424        if (ret) {
3425                /* Destroy the CM ID by returning a non-zero value. */
3426                id_priv->cm_id.ib = NULL;
3427                cma_exch(id_priv, RDMA_CM_DESTROYING);
3428                mutex_unlock(&id_priv->handler_mutex);
3429                rdma_destroy_id(&id_priv->id);
3430                return ret;
3431        }
3432out:
3433        mutex_unlock(&id_priv->handler_mutex);
3434        return ret;
3435}
3436
3437static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
3438                              struct rdma_conn_param *conn_param)
3439{
3440        struct ib_cm_sidr_req_param req;
3441        struct ib_cm_id *id;
3442        void *private_data;
3443        int offset, ret;
3444
3445        memset(&req, 0, sizeof req);
3446        offset = cma_user_data_offset(id_priv);
3447        req.private_data_len = offset + conn_param->private_data_len;
3448        if (req.private_data_len < conn_param->private_data_len)
3449                return -EINVAL;
3450
3451        if (req.private_data_len) {
3452                private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
3453                if (!private_data)
3454                        return -ENOMEM;
3455        } else {
3456                private_data = NULL;
3457        }
3458
3459        if (conn_param->private_data && conn_param->private_data_len)
3460                memcpy(private_data + offset, conn_param->private_data,
3461                       conn_param->private_data_len);
3462
3463        if (private_data) {
3464                ret = cma_format_hdr(private_data, id_priv);
3465                if (ret)
3466                        goto out;
3467                req.private_data = private_data;
3468        }
3469
3470        id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
3471                             id_priv);
3472        if (IS_ERR(id)) {
3473                ret = PTR_ERR(id);
3474                goto out;
3475        }
3476        id_priv->cm_id.ib = id;
3477
3478        req.path = id_priv->id.route.path_rec;
3479        req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3480        req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
3481        req.max_cm_retries = CMA_MAX_CM_RETRIES;
3482
3483        ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
3484        if (ret) {
3485                ib_destroy_cm_id(id_priv->cm_id.ib);
3486                id_priv->cm_id.ib = NULL;
3487        }
3488out:
3489        kfree(private_data);
3490        return ret;
3491}
3492
3493static int cma_connect_ib(struct rdma_id_private *id_priv,
3494                          struct rdma_conn_param *conn_param)
3495{
3496        struct ib_cm_req_param req;
3497        struct rdma_route *route;
3498        void *private_data;
3499        struct ib_cm_id *id;
3500        int offset, ret;
3501
3502        memset(&req, 0, sizeof req);
3503        offset = cma_user_data_offset(id_priv);
3504        req.private_data_len = offset + conn_param->private_data_len;
3505        if (req.private_data_len < conn_param->private_data_len)
3506                return -EINVAL;
3507
3508        if (req.private_data_len) {
3509                private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
3510                if (!private_data)
3511                        return -ENOMEM;
3512        } else {
3513                private_data = NULL;
3514        }
3515
3516        if (conn_param->private_data && conn_param->private_data_len)
3517                memcpy(private_data + offset, conn_param->private_data,
3518                       conn_param->private_data_len);
3519
3520        id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv);
3521        if (IS_ERR(id)) {
3522                ret = PTR_ERR(id);
3523                goto out;
3524        }
3525        id_priv->cm_id.ib = id;
3526
3527        route = &id_priv->id.route;
3528        if (private_data) {
3529                ret = cma_format_hdr(private_data, id_priv);
3530                if (ret)
3531                        goto out;
3532                req.private_data = private_data;
3533        }
3534
3535        req.primary_path = &route->path_rec[0];
3536        if (route->num_paths == 2)
3537                req.alternate_path = &route->path_rec[1];
3538
3539        req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3540        req.qp_num = id_priv->qp_num;
3541        req.qp_type = id_priv->id.qp_type;
3542        req.starting_psn = id_priv->seq_num;
3543        req.responder_resources = conn_param->responder_resources;
3544        req.initiator_depth = conn_param->initiator_depth;
3545        req.flow_control = conn_param->flow_control;
3546        req.retry_count = min_t(u8, 7, conn_param->retry_count);
3547        req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
3548        req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
3549        req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
3550        req.max_cm_retries = CMA_MAX_CM_RETRIES;
3551        req.srq = id_priv->srq ? 1 : 0;
3552
3553        ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
3554out:
3555        if (ret && !IS_ERR(id)) {
3556                ib_destroy_cm_id(id);
3557                id_priv->cm_id.ib = NULL;
3558        }
3559
3560        kfree(private_data);
3561        return ret;
3562}
3563
3564static int cma_connect_iw(struct rdma_id_private *id_priv,
3565                          struct rdma_conn_param *conn_param)
3566{
3567        struct iw_cm_id *cm_id;
3568        int ret;
3569        struct iw_cm_conn_param iw_param;
3570
3571        cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
3572        if (IS_ERR(cm_id))
3573                return PTR_ERR(cm_id);
3574
3575        cm_id->tos = id_priv->tos;
3576        id_priv->cm_id.iw = cm_id;
3577
3578        memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
3579               rdma_addr_size(cma_src_addr(id_priv)));
3580        memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv),
3581               rdma_addr_size(cma_dst_addr(id_priv)));
3582
3583        ret = cma_modify_qp_rtr(id_priv, conn_param);
3584        if (ret)
3585                goto out;
3586
3587        if (conn_param) {
3588                iw_param.ord = conn_param->initiator_depth;
3589                iw_param.ird = conn_param->responder_resources;
3590                iw_param.private_data = conn_param->private_data;
3591                iw_param.private_data_len = conn_param->private_data_len;
3592                iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
3593        } else {
3594                memset(&iw_param, 0, sizeof iw_param);
3595                iw_param.qpn = id_priv->qp_num;
3596        }
3597        ret = iw_cm_connect(cm_id, &iw_param);
3598out:
3599        if (ret) {
3600                iw_destroy_cm_id(cm_id);
3601                id_priv->cm_id.iw = NULL;
3602        }
3603        return ret;
3604}
3605
3606int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
3607{
3608        struct rdma_id_private *id_priv;
3609        int ret;
3610
3611        id_priv = container_of(id, struct rdma_id_private, id);
3612        if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
3613                return -EINVAL;
3614
3615        if (!id->qp) {
3616                id_priv->qp_num = conn_param->qp_num;
3617                id_priv->srq = conn_param->srq;
3618        }
3619
3620        if (rdma_cap_ib_cm(id->device, id->port_num)) {
3621                if (id->qp_type == IB_QPT_UD)
3622                        ret = cma_resolve_ib_udp(id_priv, conn_param);
3623                else
3624                        ret = cma_connect_ib(id_priv, conn_param);
3625        } else if (rdma_cap_iw_cm(id->device, id->port_num))
3626                ret = cma_connect_iw(id_priv, conn_param);
3627        else
3628                ret = -ENOSYS;
3629        if (ret)
3630                goto err;
3631
3632        return 0;
3633err:
3634        cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
3635        return ret;
3636}
3637EXPORT_SYMBOL(rdma_connect);
3638
3639static int cma_accept_ib(struct rdma_id_private *id_priv,
3640                         struct rdma_conn_param *conn_param)
3641{
3642        struct ib_cm_rep_param rep;
3643        int ret;
3644
3645        ret = cma_modify_qp_rtr(id_priv, conn_param);
3646        if (ret)
3647                goto out;
3648
3649        ret = cma_modify_qp_rts(id_priv, conn_param);
3650        if (ret)
3651                goto out;
3652
3653        memset(&rep, 0, sizeof rep);
3654        rep.qp_num = id_priv->qp_num;
3655        rep.starting_psn = id_priv->seq_num;
3656        rep.private_data = conn_param->private_data;
3657        rep.private_data_len = conn_param->private_data_len;
3658        rep.responder_resources = conn_param->responder_resources;
3659        rep.initiator_depth = conn_param->initiator_depth;
3660        rep.failover_accepted = 0;
3661        rep.flow_control = conn_param->flow_control;
3662        rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
3663        rep.srq = id_priv->srq ? 1 : 0;
3664
3665        ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
3666out:
3667        return ret;
3668}
3669
3670static int cma_accept_iw(struct rdma_id_private *id_priv,
3671                  struct rdma_conn_param *conn_param)
3672{
3673        struct iw_cm_conn_param iw_param;
3674        int ret;
3675
3676        if (!conn_param)
3677                return -EINVAL;
3678
3679        ret = cma_modify_qp_rtr(id_priv, conn_param);
3680        if (ret)
3681                return ret;
3682
3683        iw_param.ord = conn_param->initiator_depth;
3684        iw_param.ird = conn_param->responder_resources;
3685        iw_param.private_data = conn_param->private_data;
3686        iw_param.private_data_len = conn_param->private_data_len;
3687        if (id_priv->id.qp) {
3688                iw_param.qpn = id_priv->qp_num;
3689        } else
3690                iw_param.qpn = conn_param->qp_num;
3691
3692        return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
3693}
3694
3695static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
3696                             enum ib_cm_sidr_status status, u32 qkey,
3697                             const void *private_data, int private_data_len)
3698{
3699        struct ib_cm_sidr_rep_param rep;
3700        int ret;
3701
3702        memset(&rep, 0, sizeof rep);
3703        rep.status = status;
3704        if (status == IB_SIDR_SUCCESS) {
3705                ret = cma_set_qkey(id_priv, qkey);
3706                if (ret)
3707                        return ret;
3708                rep.qp_num = id_priv->qp_num;
3709                rep.qkey = id_priv->qkey;
3710        }
3711        rep.private_data = private_data;
3712        rep.private_data_len = private_data_len;
3713
3714        return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
3715}
3716
3717int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
3718{
3719        struct rdma_id_private *id_priv;
3720        int ret;
3721
3722        id_priv = container_of(id, struct rdma_id_private, id);
3723
3724        id_priv->owner = task_pid_nr(current);
3725
3726        if (!cma_comp(id_priv, RDMA_CM_CONNECT))
3727                return -EINVAL;
3728
3729        if (!id->qp && conn_param) {
3730                id_priv->qp_num = conn_param->qp_num;
3731                id_priv->srq = conn_param->srq;
3732        }
3733
3734        if (rdma_cap_ib_cm(id->device, id->port_num)) {
3735                if (id->qp_type == IB_QPT_UD) {
3736                        if (conn_param)
3737                                ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
3738                                                        conn_param->qkey,
3739                                                        conn_param->private_data,
3740                                                        conn_param->private_data_len);
3741                        else
3742                                ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
3743                                                        0, NULL, 0);
3744                } else {
3745                        if (conn_param)
3746                                ret = cma_accept_ib(id_priv, conn_param);
3747                        else
3748                                ret = cma_rep_recv(id_priv);
3749                }
3750        } else if (rdma_cap_iw_cm(id->device, id->port_num))
3751                ret = cma_accept_iw(id_priv, conn_param);
3752        else
3753                ret = -ENOSYS;
3754
3755        if (ret)
3756                goto reject;
3757
3758        return 0;
3759reject:
3760        cma_modify_qp_err(id_priv);
3761        rdma_reject(id, NULL, 0);
3762        return ret;
3763}
3764EXPORT_SYMBOL(rdma_accept);
3765
3766int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
3767{
3768        struct rdma_id_private *id_priv;
3769        int ret;
3770
3771        id_priv = container_of(id, struct rdma_id_private, id);
3772        if (!id_priv->cm_id.ib)
3773                return -EINVAL;
3774
3775        switch (id->device->node_type) {
3776        case RDMA_NODE_IB_CA:
3777                ret = ib_cm_notify(id_priv->cm_id.ib, event);
3778                break;
3779        default:
3780                ret = 0;
3781                break;
3782        }
3783        return ret;
3784}
3785EXPORT_SYMBOL(rdma_notify);
3786
3787int rdma_reject(struct rdma_cm_id *id, const void *private_data,
3788                u8 private_data_len)
3789{
3790        struct rdma_id_private *id_priv;
3791        int ret;
3792
3793        id_priv = container_of(id, struct rdma_id_private, id);
3794        if (!id_priv->cm_id.ib)
3795                return -EINVAL;
3796
3797        if (rdma_cap_ib_cm(id->device, id->port_num)) {
3798                if (id->qp_type == IB_QPT_UD)
3799                        ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
3800                                                private_data, private_data_len);
3801                else
3802                        ret = ib_send_cm_rej(id_priv->cm_id.ib,
3803                                             IB_CM_REJ_CONSUMER_DEFINED, NULL,
3804                                             0, private_data, private_data_len);
3805        } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
3806                ret = iw_cm_reject(id_priv->cm_id.iw,
3807                                   private_data, private_data_len);
3808        } else
3809                ret = -ENOSYS;
3810
3811        return ret;
3812}
3813EXPORT_SYMBOL(rdma_reject);
3814
3815int rdma_disconnect(struct rdma_cm_id *id)
3816{
3817        struct rdma_id_private *id_priv;
3818        int ret;
3819
3820        id_priv = container_of(id, struct rdma_id_private, id);
3821        if (!id_priv->cm_id.ib)
3822                return -EINVAL;
3823
3824        if (rdma_cap_ib_cm(id->device, id->port_num)) {
3825                ret = cma_modify_qp_err(id_priv);
3826                if (ret)
3827                        goto out;
3828                /* Initiate or respond to a disconnect. */
3829                if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
3830                        ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
3831        } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
3832                ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
3833        } else
3834                ret = -EINVAL;
3835
3836out:
3837        return ret;
3838}
3839EXPORT_SYMBOL(rdma_disconnect);
3840
3841static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
3842{
3843        struct rdma_id_private *id_priv;
3844        struct cma_multicast *mc = multicast->context;
3845        struct rdma_cm_event event;
3846        int ret = 0;
3847
3848        id_priv = mc->id_priv;
3849        mutex_lock(&id_priv->handler_mutex);
3850        if (id_priv->state != RDMA_CM_ADDR_BOUND &&
3851            id_priv->state != RDMA_CM_ADDR_RESOLVED)
3852                goto out;
3853
3854        if (!status)
3855                status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
3856        else
3857                pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
3858                                     status);
3859        mutex_lock(&id_priv->qp_mutex);
3860        if (!status && id_priv->id.qp) {
3861                status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
3862                                         be16_to_cpu(multicast->rec.mlid));
3863                if (status)
3864                        pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
3865                                             status);
3866        }
3867        mutex_unlock(&id_priv->qp_mutex);
3868
3869        memset(&event, 0, sizeof event);
3870        event.status = status;
3871        event.param.ud.private_data = mc->context;
3872        if (!status) {
3873                struct rdma_dev_addr *dev_addr =
3874                        &id_priv->id.route.addr.dev_addr;
3875                struct net_device *ndev =
3876                        dev_get_by_index(&init_net, dev_addr->bound_dev_if);
3877                enum ib_gid_type gid_type =
3878                        id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
3879                        rdma_start_port(id_priv->cma_dev->device)];
3880
3881                event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
3882                ib_init_ah_from_mcmember(id_priv->id.device,
3883                                         id_priv->id.port_num, &multicast->rec,
3884                                         ndev, gid_type,
3885                                         &event.param.ud.ah_attr);
3886                event.param.ud.qp_num = 0xFFFFFF;
3887                event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
3888                if (ndev)
3889                        dev_put(ndev);
3890        } else
3891                event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
3892
3893        ret = id_priv->id.event_handler(&id_priv->id, &event);
3894        if (ret) {
3895                cma_exch(id_priv, RDMA_CM_DESTROYING);
3896                mutex_unlock(&id_priv->handler_mutex);
3897                rdma_destroy_id(&id_priv->id);
3898                return 0;
3899        }
3900
3901out:
3902        mutex_unlock(&id_priv->handler_mutex);
3903        return 0;
3904}
3905
3906static void cma_set_mgid(struct rdma_id_private *id_priv,
3907                         struct sockaddr *addr, union ib_gid *mgid)
3908{
3909        unsigned char mc_map[MAX_ADDR_LEN];
3910        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3911        struct sockaddr_in *sin = (struct sockaddr_in *) addr;
3912        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
3913
3914        if (cma_any_addr(addr)) {
3915                memset(mgid, 0, sizeof *mgid);
3916        } else if ((addr->sa_family == AF_INET6) &&
3917                   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
3918                                                                 0xFF10A01B)) {
3919                /* IPv6 address is an SA assigned MGID. */
3920                memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3921        } else if (addr->sa_family == AF_IB) {
3922                memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
3923        } else if ((addr->sa_family == AF_INET6)) {
3924                ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
3925                if (id_priv->id.ps == RDMA_PS_UDP)
3926                        mc_map[7] = 0x01;       /* Use RDMA CM signature */
3927                *mgid = *(union ib_gid *) (mc_map + 4);
3928        } else {
3929                ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
3930                if (id_priv->id.ps == RDMA_PS_UDP)
3931                        mc_map[7] = 0x01;       /* Use RDMA CM signature */
3932                *mgid = *(union ib_gid *) (mc_map + 4);
3933        }
3934}
3935
3936static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
3937                                 struct cma_multicast *mc)
3938{
3939        struct ib_sa_mcmember_rec rec;
3940        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3941        ib_sa_comp_mask comp_mask;
3942        int ret;
3943
3944        ib_addr_get_mgid(dev_addr, &rec.mgid);
3945        ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
3946                                     &rec.mgid, &rec);
3947        if (ret)
3948                return ret;
3949
3950        ret = cma_set_qkey(id_priv, 0);
3951        if (ret)
3952                return ret;
3953
3954        cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
3955        rec.qkey = cpu_to_be32(id_priv->qkey);
3956        rdma_addr_get_sgid(dev_addr, &rec.port_gid);
3957        rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
3958        rec.join_state = mc->join_state;
3959
3960        if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
3961            (!ib_sa_sendonly_fullmem_support(&sa_client,
3962                                             id_priv->id.device,
3963                                             id_priv->id.port_num))) {
3964                pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
3965                        "RDMA CM: SM doesn't support Send Only Full Member option\n",
3966                        id_priv->id.device->name, id_priv->id.port_num);
3967                return -EOPNOTSUPP;
3968        }
3969
3970        comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
3971                    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
3972                    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
3973                    IB_SA_MCMEMBER_REC_FLOW_LABEL |
3974                    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
3975
3976        if (id_priv->id.ps == RDMA_PS_IPOIB)
3977                comp_mask |= IB_SA_MCMEMBER_REC_RATE |
3978                             IB_SA_MCMEMBER_REC_RATE_SELECTOR |
3979                             IB_SA_MCMEMBER_REC_MTU_SELECTOR |
3980                             IB_SA_MCMEMBER_REC_MTU |
3981                             IB_SA_MCMEMBER_REC_HOP_LIMIT;
3982
3983        mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
3984                                                id_priv->id.port_num, &rec,
3985                                                comp_mask, GFP_KERNEL,
3986                                                cma_ib_mc_handler, mc);
3987        return PTR_ERR_OR_ZERO(mc->multicast.ib);
3988}
3989
3990static void iboe_mcast_work_handler(struct work_struct *work)
3991{
3992        struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
3993        struct cma_multicast *mc = mw->mc;
3994        struct ib_sa_multicast *m = mc->multicast.ib;
3995
3996        mc->multicast.ib->context = mc;
3997        cma_ib_mc_handler(0, m);
3998        kref_put(&mc->mcref, release_mc);
3999        kfree(mw);
4000}
4001
4002static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
4003                              enum ib_gid_type gid_type)
4004{
4005        struct sockaddr_in *sin = (struct sockaddr_in *)addr;
4006        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
4007
4008        if (cma_any_addr(addr)) {
4009                memset(mgid, 0, sizeof *mgid);
4010        } else if (addr->sa_family == AF_INET6) {
4011                memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
4012        } else {
4013                mgid->raw[0] = (gid_type == IB_GID_TYPE_IB) ? 0xff : 0;
4014                mgid->raw[1] = (gid_type == IB_GID_TYPE_IB) ? 0x0e : 0;
4015                mgid->raw[2] = 0;
4016                mgid->raw[3] = 0;
4017                mgid->raw[4] = 0;
4018                mgid->raw[5] = 0;
4019                mgid->raw[6] = 0;
4020                mgid->raw[7] = 0;
4021                mgid->raw[8] = 0;
4022                mgid->raw[9] = 0;
4023                mgid->raw[10] = 0xff;
4024                mgid->raw[11] = 0xff;
4025                *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
4026        }
4027}
4028
4029static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
4030                                   struct cma_multicast *mc)
4031{
4032        struct iboe_mcast_work *work;
4033        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
4034        int err = 0;
4035        struct sockaddr *addr = (struct sockaddr *)&mc->addr;
4036        struct net_device *ndev = NULL;
4037        enum ib_gid_type gid_type;
4038        bool send_only;
4039
4040        send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
4041
4042        if (cma_zero_addr((struct sockaddr *)&mc->addr))
4043                return -EINVAL;
4044
4045        work = kzalloc(sizeof *work, GFP_KERNEL);
4046        if (!work)
4047                return -ENOMEM;
4048
4049        mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
4050        if (!mc->multicast.ib) {
4051                err = -ENOMEM;
4052                goto out1;
4053        }
4054
4055        gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
4056                   rdma_start_port(id_priv->cma_dev->device)];
4057        cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
4058
4059        mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
4060        if (id_priv->id.ps == RDMA_PS_UDP)
4061                mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
4062
4063        if (dev_addr->bound_dev_if)
4064                ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
4065        if (!ndev) {
4066                err = -ENODEV;
4067                goto out2;
4068        }
4069        mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
4070        mc->multicast.ib->rec.hop_limit = 1;
4071        mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
4072
4073        if (addr->sa_family == AF_INET) {
4074                if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
4075                        mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
4076                        if (!send_only) {
4077                                err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
4078                                                    true);
4079                                if (!err)
4080                                        mc->igmp_joined = true;
4081                        }
4082                }
4083        } else {
4084                if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
4085                        err = -ENOTSUPP;
4086        }
4087        dev_put(ndev);
4088        if (err || !mc->multicast.ib->rec.mtu) {
4089                if (!err)
4090                        err = -EINVAL;
4091                goto out2;
4092        }
4093        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
4094                    &mc->multicast.ib->rec.port_gid);
4095        work->id = id_priv;
4096        work->mc = mc;
4097        INIT_WORK(&work->work, iboe_mcast_work_handler);
4098        kref_get(&mc->mcref);
4099        queue_work(cma_wq, &work->work);
4100
4101        return 0;
4102
4103out2:
4104        kfree(mc->multicast.ib);
4105out1:
4106        kfree(work);
4107        return err;
4108}
4109
4110int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
4111                        u8 join_state, void *context)
4112{
4113        struct rdma_id_private *id_priv;
4114        struct cma_multicast *mc;
4115        int ret;
4116
4117        id_priv = container_of(id, struct rdma_id_private, id);
4118        if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
4119            !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
4120                return -EINVAL;
4121
4122        mc = kmalloc(sizeof *mc, GFP_KERNEL);
4123        if (!mc)
4124                return -ENOMEM;
4125
4126        memcpy(&mc->addr, addr, rdma_addr_size(addr));
4127        mc->context = context;
4128        mc->id_priv = id_priv;
4129        mc->igmp_joined = false;
4130        mc->join_state = join_state;
4131        spin_lock(&id_priv->lock);
4132        list_add(&mc->list, &id_priv->mc_list);
4133        spin_unlock(&id_priv->lock);
4134
4135        if (rdma_protocol_roce(id->device, id->port_num)) {
4136                kref_init(&mc->mcref);
4137                ret = cma_iboe_join_multicast(id_priv, mc);
4138        } else if (rdma_cap_ib_mcast(id->device, id->port_num))
4139                ret = cma_join_ib_multicast(id_priv, mc);
4140        else
4141                ret = -ENOSYS;
4142
4143        if (ret) {
4144                spin_lock_irq(&id_priv->lock);
4145                list_del(&mc->list);
4146                spin_unlock_irq(&id_priv->lock);
4147                kfree(mc);
4148        }
4149        return ret;
4150}
4151EXPORT_SYMBOL(rdma_join_multicast);
4152
4153void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
4154{
4155        struct rdma_id_private *id_priv;
4156        struct cma_multicast *mc;
4157
4158        id_priv = container_of(id, struct rdma_id_private, id);
4159        spin_lock_irq(&id_priv->lock);
4160        list_for_each_entry(mc, &id_priv->mc_list, list) {
4161                if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
4162                        list_del(&mc->list);
4163                        spin_unlock_irq(&id_priv->lock);
4164
4165                        if (id->qp)
4166                                ib_detach_mcast(id->qp,
4167                                                &mc->multicast.ib->rec.mgid,
4168                                                be16_to_cpu(mc->multicast.ib->rec.mlid));
4169
4170                        BUG_ON(id_priv->cma_dev->device != id->device);
4171
4172                        if (rdma_cap_ib_mcast(id->device, id->port_num)) {
4173                                ib_sa_free_multicast(mc->multicast.ib);
4174                                kfree(mc);
4175                        } else if (rdma_protocol_roce(id->device, id->port_num)) {
4176                                if (mc->igmp_joined) {
4177                                        struct rdma_dev_addr *dev_addr =
4178                                                &id->route.addr.dev_addr;
4179                                        struct net_device *ndev = NULL;
4180
4181                                        if (dev_addr->bound_dev_if)
4182                                                ndev = dev_get_by_index(&init_net,
4183                                                                        dev_addr->bound_dev_if);
4184                                        if (ndev) {
4185                                                cma_igmp_send(ndev,
4186                                                              &mc->multicast.ib->rec.mgid,
4187                                                              false);
4188                                                dev_put(ndev);
4189                                        }
4190                                        mc->igmp_joined = false;
4191                                }
4192                                kref_put(&mc->mcref, release_mc);
4193                        }
4194                        return;
4195                }
4196        }
4197        spin_unlock_irq(&id_priv->lock);
4198}
4199EXPORT_SYMBOL(rdma_leave_multicast);
4200
4201static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
4202{
4203        struct rdma_dev_addr *dev_addr;
4204        struct cma_ndev_work *work;
4205
4206        dev_addr = &id_priv->id.route.addr.dev_addr;
4207
4208        if ((dev_addr->bound_dev_if == ndev->ifindex) &&
4209            (net_eq(dev_net(ndev), dev_addr->net)) &&
4210            memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
4211                pr_info("RDMA CM addr change for ndev %s used by id %p\n",
4212                        ndev->name, &id_priv->id);
4213                work = kzalloc(sizeof *work, GFP_KERNEL);
4214                if (!work)
4215                        return -ENOMEM;
4216
4217                INIT_WORK(&work->work, cma_ndev_work_handler);
4218                work->id = id_priv;
4219                work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
4220                atomic_inc(&id_priv->refcount);
4221                queue_work(cma_wq, &work->work);
4222        }
4223
4224        return 0;
4225}
4226
4227static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
4228                               void *ptr)
4229{
4230        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4231        struct cma_device *cma_dev;
4232        struct rdma_id_private *id_priv;
4233        int ret = NOTIFY_DONE;
4234
4235        if (event != NETDEV_BONDING_FAILOVER)
4236                return NOTIFY_DONE;
4237
4238        if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
4239                return NOTIFY_DONE;
4240
4241        mutex_lock(&lock);
4242        list_for_each_entry(cma_dev, &dev_list, list)
4243                list_for_each_entry(id_priv, &cma_dev->id_list, list) {
4244                        ret = cma_netdev_change(ndev, id_priv);
4245                        if (ret)
4246                                goto out;
4247                }
4248
4249out:
4250        mutex_unlock(&lock);
4251        return ret;
4252}
4253
4254static struct notifier_block cma_nb = {
4255        .notifier_call = cma_netdev_callback
4256};
4257
4258static void cma_add_one(struct ib_device *device)
4259{
4260        struct cma_device *cma_dev;
4261        struct rdma_id_private *id_priv;
4262        unsigned int i;
4263        unsigned long supported_gids = 0;
4264
4265        cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
4266        if (!cma_dev)
4267                return;
4268
4269        cma_dev->device = device;
4270        cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
4271                                            sizeof(*cma_dev->default_gid_type),
4272                                            GFP_KERNEL);
4273        if (!cma_dev->default_gid_type)
4274                goto free_cma_dev;
4275
4276        cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
4277                                            sizeof(*cma_dev->default_roce_tos),
4278                                            GFP_KERNEL);
4279        if (!cma_dev->default_roce_tos)
4280                goto free_gid_type;
4281
4282        for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
4283                supported_gids = roce_gid_type_mask_support(device, i);
4284                WARN_ON(!supported_gids);
4285                if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
4286                        cma_dev->default_gid_type[i - rdma_start_port(device)] =
4287                                CMA_PREFERRED_ROCE_GID_TYPE;
4288                else
4289                        cma_dev->default_gid_type[i - rdma_start_port(device)] =
4290                                find_first_bit(&supported_gids, BITS_PER_LONG);
4291                cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
4292        }
4293
4294        init_completion(&cma_dev->comp);
4295        atomic_set(&cma_dev->refcount, 1);
4296        INIT_LIST_HEAD(&cma_dev->id_list);
4297        ib_set_client_data(device, &cma_client, cma_dev);
4298
4299        mutex_lock(&lock);
4300        list_add_tail(&cma_dev->list, &dev_list);
4301        list_for_each_entry(id_priv, &listen_any_list, list)
4302                cma_listen_on_dev(id_priv, cma_dev);
4303        mutex_unlock(&lock);
4304
4305        return;
4306
4307free_gid_type:
4308        kfree(cma_dev->default_gid_type);
4309
4310free_cma_dev:
4311        kfree(cma_dev);
4312
4313        return;
4314}
4315
4316static int cma_remove_id_dev(struct rdma_id_private *id_priv)
4317{
4318        struct rdma_cm_event event;
4319        enum rdma_cm_state state;
4320        int ret = 0;
4321
4322        /* Record that we want to remove the device */
4323        state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
4324        if (state == RDMA_CM_DESTROYING)
4325                return 0;
4326
4327        cma_cancel_operation(id_priv, state);
4328        mutex_lock(&id_priv->handler_mutex);
4329
4330        /* Check for destruction from another callback. */
4331        if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
4332                goto out;
4333
4334        memset(&event, 0, sizeof event);
4335        event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
4336        ret = id_priv->id.event_handler(&id_priv->id, &event);
4337out:
4338        mutex_unlock(&id_priv->handler_mutex);
4339        return ret;
4340}
4341
4342static void cma_process_remove(struct cma_device *cma_dev)
4343{
4344        struct rdma_id_private *id_priv;
4345        int ret;
4346
4347        mutex_lock(&lock);
4348        while (!list_empty(&cma_dev->id_list)) {
4349                id_priv = list_entry(cma_dev->id_list.next,
4350                                     struct rdma_id_private, list);
4351
4352                list_del(&id_priv->listen_list);
4353                list_del_init(&id_priv->list);
4354                atomic_inc(&id_priv->refcount);
4355                mutex_unlock(&lock);
4356
4357                ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
4358                cma_deref_id(id_priv);
4359                if (ret)
4360                        rdma_destroy_id(&id_priv->id);
4361
4362                mutex_lock(&lock);
4363        }
4364        mutex_unlock(&lock);
4365
4366        cma_deref_dev(cma_dev);
4367        wait_for_completion(&cma_dev->comp);
4368}
4369
4370static void cma_remove_one(struct ib_device *device, void *client_data)
4371{
4372        struct cma_device *cma_dev = client_data;
4373
4374        if (!cma_dev)
4375                return;
4376
4377        mutex_lock(&lock);
4378        list_del(&cma_dev->list);
4379        mutex_unlock(&lock);
4380
4381        cma_process_remove(cma_dev);
4382        kfree(cma_dev->default_roce_tos);
4383        kfree(cma_dev->default_gid_type);
4384        kfree(cma_dev);
4385}
4386
4387static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
4388{
4389        struct nlmsghdr *nlh;
4390        struct rdma_cm_id_stats *id_stats;
4391        struct rdma_id_private *id_priv;
4392        struct rdma_cm_id *id = NULL;
4393        struct cma_device *cma_dev;
4394        int i_dev = 0, i_id = 0;
4395
4396        /*
4397         * We export all of the IDs as a sequence of messages.  Each
4398         * ID gets its own netlink message.
4399         */
4400        mutex_lock(&lock);
4401
4402        list_for_each_entry(cma_dev, &dev_list, list) {
4403                if (i_dev < cb->args[0]) {
4404                        i_dev++;
4405                        continue;
4406                }
4407
4408                i_id = 0;
4409                list_for_each_entry(id_priv, &cma_dev->id_list, list) {
4410                        if (i_id < cb->args[1]) {
4411                                i_id++;
4412                                continue;
4413                        }
4414
4415                        id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
4416                                                sizeof *id_stats, RDMA_NL_RDMA_CM,
4417                                                RDMA_NL_RDMA_CM_ID_STATS,
4418                                                NLM_F_MULTI);
4419                        if (!id_stats)
4420                                goto out;
4421
4422                        memset(id_stats, 0, sizeof *id_stats);
4423                        id = &id_priv->id;
4424                        id_stats->node_type = id->route.addr.dev_addr.dev_type;
4425                        id_stats->port_num = id->port_num;
4426                        id_stats->bound_dev_if =
4427                                id->route.addr.dev_addr.bound_dev_if;
4428
4429                        if (ibnl_put_attr(skb, nlh,
4430                                          rdma_addr_size(cma_src_addr(id_priv)),
4431                                          cma_src_addr(id_priv),
4432                                          RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
4433                                goto out;
4434                        if (ibnl_put_attr(skb, nlh,
4435                                          rdma_addr_size(cma_src_addr(id_priv)),
4436                                          cma_dst_addr(id_priv),
4437                                          RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
4438                                goto out;
4439
4440                        id_stats->pid           = id_priv->owner;
4441                        id_stats->port_space    = id->ps;
4442                        id_stats->cm_state      = id_priv->state;
4443                        id_stats->qp_num        = id_priv->qp_num;
4444                        id_stats->qp_type       = id->qp_type;
4445
4446                        i_id++;
4447                }
4448
4449                cb->args[1] = 0;
4450                i_dev++;
4451        }
4452
4453out:
4454        mutex_unlock(&lock);
4455        cb->args[0] = i_dev;
4456        cb->args[1] = i_id;
4457
4458        return skb->len;
4459}
4460
4461static const struct rdma_nl_cbs cma_cb_table[] = {
4462        [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats},
4463};
4464
4465static int cma_init_net(struct net *net)
4466{
4467        struct cma_pernet *pernet = cma_pernet(net);
4468
4469        idr_init(&pernet->tcp_ps);
4470        idr_init(&pernet->udp_ps);
4471        idr_init(&pernet->ipoib_ps);
4472        idr_init(&pernet->ib_ps);
4473
4474        return 0;
4475}
4476
4477static void cma_exit_net(struct net *net)
4478{
4479        struct cma_pernet *pernet = cma_pernet(net);
4480
4481        idr_destroy(&pernet->tcp_ps);
4482        idr_destroy(&pernet->udp_ps);
4483        idr_destroy(&pernet->ipoib_ps);
4484        idr_destroy(&pernet->ib_ps);
4485}
4486
4487static struct pernet_operations cma_pernet_operations = {
4488        .init = cma_init_net,
4489        .exit = cma_exit_net,
4490        .id = &cma_pernet_id,
4491        .size = sizeof(struct cma_pernet),
4492};
4493
4494static int __init cma_init(void)
4495{
4496        int ret;
4497
4498        cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
4499        if (!cma_wq)
4500                return -ENOMEM;
4501
4502        ret = register_pernet_subsys(&cma_pernet_operations);
4503        if (ret)
4504                goto err_wq;
4505
4506        ib_sa_register_client(&sa_client);
4507        rdma_addr_register_client(&addr_client);
4508        register_netdevice_notifier(&cma_nb);
4509
4510        ret = ib_register_client(&cma_client);
4511        if (ret)
4512                goto err;
4513
4514        rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table);
4515        cma_configfs_init();
4516
4517        return 0;
4518
4519err:
4520        unregister_netdevice_notifier(&cma_nb);
4521        rdma_addr_unregister_client(&addr_client);
4522        ib_sa_unregister_client(&sa_client);
4523err_wq:
4524        destroy_workqueue(cma_wq);
4525        return ret;
4526}
4527
4528static void __exit cma_cleanup(void)
4529{
4530        cma_configfs_exit();
4531        rdma_nl_unregister(RDMA_NL_RDMA_CM);
4532        ib_unregister_client(&cma_client);
4533        unregister_netdevice_notifier(&cma_nb);
4534        rdma_addr_unregister_client(&addr_client);
4535        ib_sa_unregister_client(&sa_client);
4536        unregister_pernet_subsys(&cma_pernet_operations);
4537        destroy_workqueue(cma_wq);
4538}
4539
4540MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1);
4541
4542module_init(cma_init);
4543module_exit(cma_cleanup);
4544