linux/drivers/infiniband/core/cma.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
   3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
   4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
   5 * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/completion.h>
  37#include <linux/in.h>
  38#include <linux/in6.h>
  39#include <linux/mutex.h>
  40#include <linux/random.h>
  41#include <linux/igmp.h>
  42#include <linux/xarray.h>
  43#include <linux/inetdevice.h>
  44#include <linux/slab.h>
  45#include <linux/module.h>
  46#include <net/route.h>
  47
  48#include <net/net_namespace.h>
  49#include <net/netns/generic.h>
  50#include <net/tcp.h>
  51#include <net/ipv6.h>
  52#include <net/ip_fib.h>
  53#include <net/ip6_route.h>
  54
  55#include <rdma/rdma_cm.h>
  56#include <rdma/rdma_cm_ib.h>
  57#include <rdma/rdma_netlink.h>
  58#include <rdma/ib.h>
  59#include <rdma/ib_cache.h>
  60#include <rdma/ib_cm.h>
  61#include <rdma/ib_sa.h>
  62#include <rdma/iw_cm.h>
  63
  64#include "core_priv.h"
  65#include "cma_priv.h"
  66
  67MODULE_AUTHOR("Sean Hefty");
  68MODULE_DESCRIPTION("Generic RDMA CM Agent");
  69MODULE_LICENSE("Dual BSD/GPL");
  70
  71#define CMA_CM_RESPONSE_TIMEOUT 20
  72#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
  73#define CMA_MAX_CM_RETRIES 15
  74#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
  75#define CMA_IBOE_PACKET_LIFETIME 18
  76#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
  77
  78static const char * const cma_events[] = {
  79        [RDMA_CM_EVENT_ADDR_RESOLVED]    = "address resolved",
  80        [RDMA_CM_EVENT_ADDR_ERROR]       = "address error",
  81        [RDMA_CM_EVENT_ROUTE_RESOLVED]   = "route resolved ",
  82        [RDMA_CM_EVENT_ROUTE_ERROR]      = "route error",
  83        [RDMA_CM_EVENT_CONNECT_REQUEST]  = "connect request",
  84        [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response",
  85        [RDMA_CM_EVENT_CONNECT_ERROR]    = "connect error",
  86        [RDMA_CM_EVENT_UNREACHABLE]      = "unreachable",
  87        [RDMA_CM_EVENT_REJECTED]         = "rejected",
  88        [RDMA_CM_EVENT_ESTABLISHED]      = "established",
  89        [RDMA_CM_EVENT_DISCONNECTED]     = "disconnected",
  90        [RDMA_CM_EVENT_DEVICE_REMOVAL]   = "device removal",
  91        [RDMA_CM_EVENT_MULTICAST_JOIN]   = "multicast join",
  92        [RDMA_CM_EVENT_MULTICAST_ERROR]  = "multicast error",
  93        [RDMA_CM_EVENT_ADDR_CHANGE]      = "address change",
  94        [RDMA_CM_EVENT_TIMEWAIT_EXIT]    = "timewait exit",
  95};
  96
  97const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
  98{
  99        size_t index = event;
 100
 101        return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ?
 102                        cma_events[index] : "unrecognized event";
 103}
 104EXPORT_SYMBOL(rdma_event_msg);
 105
 106const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
 107                                                int reason)
 108{
 109        if (rdma_ib_or_roce(id->device, id->port_num))
 110                return ibcm_reject_msg(reason);
 111
 112        if (rdma_protocol_iwarp(id->device, id->port_num))
 113                return iwcm_reject_msg(reason);
 114
 115        WARN_ON_ONCE(1);
 116        return "unrecognized transport";
 117}
 118EXPORT_SYMBOL(rdma_reject_msg);
 119
 120bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
 121{
 122        if (rdma_ib_or_roce(id->device, id->port_num))
 123                return reason == IB_CM_REJ_CONSUMER_DEFINED;
 124
 125        if (rdma_protocol_iwarp(id->device, id->port_num))
 126                return reason == -ECONNREFUSED;
 127
 128        WARN_ON_ONCE(1);
 129        return false;
 130}
 131EXPORT_SYMBOL(rdma_is_consumer_reject);
 132
 133const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
 134                                      struct rdma_cm_event *ev, u8 *data_len)
 135{
 136        const void *p;
 137
 138        if (rdma_is_consumer_reject(id, ev->status)) {
 139                *data_len = ev->param.conn.private_data_len;
 140                p = ev->param.conn.private_data;
 141        } else {
 142                *data_len = 0;
 143                p = NULL;
 144        }
 145        return p;
 146}
 147EXPORT_SYMBOL(rdma_consumer_reject_data);
 148
 149/**
 150 * rdma_iw_cm_id() - return the iw_cm_id pointer for this cm_id.
 151 * @id: Communication Identifier
 152 */
 153struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *id)
 154{
 155        struct rdma_id_private *id_priv;
 156
 157        id_priv = container_of(id, struct rdma_id_private, id);
 158        if (id->device->node_type == RDMA_NODE_RNIC)
 159                return id_priv->cm_id.iw;
 160        return NULL;
 161}
 162EXPORT_SYMBOL(rdma_iw_cm_id);
 163
 164/**
 165 * rdma_res_to_id() - return the rdma_cm_id pointer for this restrack.
 166 * @res: rdma resource tracking entry pointer
 167 */
 168struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
 169{
 170        struct rdma_id_private *id_priv =
 171                container_of(res, struct rdma_id_private, res);
 172
 173        return &id_priv->id;
 174}
 175EXPORT_SYMBOL(rdma_res_to_id);
 176
 177static void cma_add_one(struct ib_device *device);
 178static void cma_remove_one(struct ib_device *device, void *client_data);
 179
 180static struct ib_client cma_client = {
 181        .name   = "cma",
 182        .add    = cma_add_one,
 183        .remove = cma_remove_one
 184};
 185
 186static struct ib_sa_client sa_client;
 187static LIST_HEAD(dev_list);
 188static LIST_HEAD(listen_any_list);
 189static DEFINE_MUTEX(lock);
 190static struct workqueue_struct *cma_wq;
 191static unsigned int cma_pernet_id;
 192
 193struct cma_pernet {
 194        struct xarray tcp_ps;
 195        struct xarray udp_ps;
 196        struct xarray ipoib_ps;
 197        struct xarray ib_ps;
 198};
 199
 200static struct cma_pernet *cma_pernet(struct net *net)
 201{
 202        return net_generic(net, cma_pernet_id);
 203}
 204
 205static
 206struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
 207{
 208        struct cma_pernet *pernet = cma_pernet(net);
 209
 210        switch (ps) {
 211        case RDMA_PS_TCP:
 212                return &pernet->tcp_ps;
 213        case RDMA_PS_UDP:
 214                return &pernet->udp_ps;
 215        case RDMA_PS_IPOIB:
 216                return &pernet->ipoib_ps;
 217        case RDMA_PS_IB:
 218                return &pernet->ib_ps;
 219        default:
 220                return NULL;
 221        }
 222}
 223
 224struct cma_device {
 225        struct list_head        list;
 226        struct ib_device        *device;
 227        struct completion       comp;
 228        atomic_t                refcount;
 229        struct list_head        id_list;
 230        enum ib_gid_type        *default_gid_type;
 231        u8                      *default_roce_tos;
 232};
 233
 234struct rdma_bind_list {
 235        enum rdma_ucm_port_space ps;
 236        struct hlist_head       owners;
 237        unsigned short          port;
 238};
 239
 240struct class_port_info_context {
 241        struct ib_class_port_info       *class_port_info;
 242        struct ib_device                *device;
 243        struct completion               done;
 244        struct ib_sa_query              *sa_query;
 245        u8                              port_num;
 246};
 247
 248static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
 249                        struct rdma_bind_list *bind_list, int snum)
 250{
 251        struct xarray *xa = cma_pernet_xa(net, ps);
 252
 253        return xa_insert(xa, snum, bind_list, GFP_KERNEL);
 254}
 255
 256static struct rdma_bind_list *cma_ps_find(struct net *net,
 257                                          enum rdma_ucm_port_space ps, int snum)
 258{
 259        struct xarray *xa = cma_pernet_xa(net, ps);
 260
 261        return xa_load(xa, snum);
 262}
 263
 264static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps,
 265                          int snum)
 266{
 267        struct xarray *xa = cma_pernet_xa(net, ps);
 268
 269        xa_erase(xa, snum);
 270}
 271
 272enum {
 273        CMA_OPTION_AFONLY,
 274};
 275
 276void cma_ref_dev(struct cma_device *cma_dev)
 277{
 278        atomic_inc(&cma_dev->refcount);
 279}
 280
 281struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter  filter,
 282                                             void               *cookie)
 283{
 284        struct cma_device *cma_dev;
 285        struct cma_device *found_cma_dev = NULL;
 286
 287        mutex_lock(&lock);
 288
 289        list_for_each_entry(cma_dev, &dev_list, list)
 290                if (filter(cma_dev->device, cookie)) {
 291                        found_cma_dev = cma_dev;
 292                        break;
 293                }
 294
 295        if (found_cma_dev)
 296                cma_ref_dev(found_cma_dev);
 297        mutex_unlock(&lock);
 298        return found_cma_dev;
 299}
 300
 301int cma_get_default_gid_type(struct cma_device *cma_dev,
 302                             unsigned int port)
 303{
 304        if (!rdma_is_port_valid(cma_dev->device, port))
 305                return -EINVAL;
 306
 307        return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
 308}
 309
 310int cma_set_default_gid_type(struct cma_device *cma_dev,
 311                             unsigned int port,
 312                             enum ib_gid_type default_gid_type)
 313{
 314        unsigned long supported_gids;
 315
 316        if (!rdma_is_port_valid(cma_dev->device, port))
 317                return -EINVAL;
 318
 319        supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
 320
 321        if (!(supported_gids & 1 << default_gid_type))
 322                return -EINVAL;
 323
 324        cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
 325                default_gid_type;
 326
 327        return 0;
 328}
 329
 330int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
 331{
 332        if (!rdma_is_port_valid(cma_dev->device, port))
 333                return -EINVAL;
 334
 335        return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
 336}
 337
 338int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port,
 339                             u8 default_roce_tos)
 340{
 341        if (!rdma_is_port_valid(cma_dev->device, port))
 342                return -EINVAL;
 343
 344        cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] =
 345                 default_roce_tos;
 346
 347        return 0;
 348}
 349struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
 350{
 351        return cma_dev->device;
 352}
 353
 354/*
 355 * Device removal can occur at anytime, so we need extra handling to
 356 * serialize notifying the user of device removal with other callbacks.
 357 * We do this by disabling removal notification while a callback is in process,
 358 * and reporting it after the callback completes.
 359 */
 360
 361struct cma_multicast {
 362        struct rdma_id_private *id_priv;
 363        union {
 364                struct ib_sa_multicast *ib;
 365        } multicast;
 366        struct list_head        list;
 367        void                    *context;
 368        struct sockaddr_storage addr;
 369        struct kref             mcref;
 370        u8                      join_state;
 371};
 372
 373struct cma_work {
 374        struct work_struct      work;
 375        struct rdma_id_private  *id;
 376        enum rdma_cm_state      old_state;
 377        enum rdma_cm_state      new_state;
 378        struct rdma_cm_event    event;
 379};
 380
 381struct cma_ndev_work {
 382        struct work_struct      work;
 383        struct rdma_id_private  *id;
 384        struct rdma_cm_event    event;
 385};
 386
 387struct iboe_mcast_work {
 388        struct work_struct       work;
 389        struct rdma_id_private  *id;
 390        struct cma_multicast    *mc;
 391};
 392
 393union cma_ip_addr {
 394        struct in6_addr ip6;
 395        struct {
 396                __be32 pad[3];
 397                __be32 addr;
 398        } ip4;
 399};
 400
 401struct cma_hdr {
 402        u8 cma_version;
 403        u8 ip_version;  /* IP version: 7:4 */
 404        __be16 port;
 405        union cma_ip_addr src_addr;
 406        union cma_ip_addr dst_addr;
 407};
 408
 409#define CMA_VERSION 0x00
 410
 411struct cma_req_info {
 412        struct sockaddr_storage listen_addr_storage;
 413        struct sockaddr_storage src_addr_storage;
 414        struct ib_device *device;
 415        union ib_gid local_gid;
 416        __be64 service_id;
 417        int port;
 418        bool has_gid;
 419        u16 pkey;
 420};
 421
 422static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
 423{
 424        unsigned long flags;
 425        int ret;
 426
 427        spin_lock_irqsave(&id_priv->lock, flags);
 428        ret = (id_priv->state == comp);
 429        spin_unlock_irqrestore(&id_priv->lock, flags);
 430        return ret;
 431}
 432
 433static int cma_comp_exch(struct rdma_id_private *id_priv,
 434                         enum rdma_cm_state comp, enum rdma_cm_state exch)
 435{
 436        unsigned long flags;
 437        int ret;
 438
 439        spin_lock_irqsave(&id_priv->lock, flags);
 440        if ((ret = (id_priv->state == comp)))
 441                id_priv->state = exch;
 442        spin_unlock_irqrestore(&id_priv->lock, flags);
 443        return ret;
 444}
 445
 446static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
 447                                   enum rdma_cm_state exch)
 448{
 449        unsigned long flags;
 450        enum rdma_cm_state old;
 451
 452        spin_lock_irqsave(&id_priv->lock, flags);
 453        old = id_priv->state;
 454        id_priv->state = exch;
 455        spin_unlock_irqrestore(&id_priv->lock, flags);
 456        return old;
 457}
 458
 459static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
 460{
 461        return hdr->ip_version >> 4;
 462}
 463
 464static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
 465{
 466        hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
 467}
 468
 469static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
 470{
 471        struct in_device *in_dev = NULL;
 472
 473        if (ndev) {
 474                rtnl_lock();
 475                in_dev = __in_dev_get_rtnl(ndev);
 476                if (in_dev) {
 477                        if (join)
 478                                ip_mc_inc_group(in_dev,
 479                                                *(__be32 *)(mgid->raw + 12));
 480                        else
 481                                ip_mc_dec_group(in_dev,
 482                                                *(__be32 *)(mgid->raw + 12));
 483                }
 484                rtnl_unlock();
 485        }
 486        return (in_dev) ? 0 : -ENODEV;
 487}
 488
 489static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
 490                               struct cma_device *cma_dev)
 491{
 492        cma_ref_dev(cma_dev);
 493        id_priv->cma_dev = cma_dev;
 494        id_priv->id.device = cma_dev->device;
 495        id_priv->id.route.addr.dev_addr.transport =
 496                rdma_node_get_transport(cma_dev->device->node_type);
 497        list_add_tail(&id_priv->list, &cma_dev->id_list);
 498        if (id_priv->res.kern_name)
 499                rdma_restrack_kadd(&id_priv->res);
 500        else
 501                rdma_restrack_uadd(&id_priv->res);
 502}
 503
 504static void cma_attach_to_dev(struct rdma_id_private *id_priv,
 505                              struct cma_device *cma_dev)
 506{
 507        _cma_attach_to_dev(id_priv, cma_dev);
 508        id_priv->gid_type =
 509                cma_dev->default_gid_type[id_priv->id.port_num -
 510                                          rdma_start_port(cma_dev->device)];
 511}
 512
 513void cma_deref_dev(struct cma_device *cma_dev)
 514{
 515        if (atomic_dec_and_test(&cma_dev->refcount))
 516                complete(&cma_dev->comp);
 517}
 518
 519static inline void release_mc(struct kref *kref)
 520{
 521        struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
 522
 523        kfree(mc->multicast.ib);
 524        kfree(mc);
 525}
 526
 527static void cma_release_dev(struct rdma_id_private *id_priv)
 528{
 529        mutex_lock(&lock);
 530        list_del(&id_priv->list);
 531        cma_deref_dev(id_priv->cma_dev);
 532        id_priv->cma_dev = NULL;
 533        mutex_unlock(&lock);
 534}
 535
 536static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
 537{
 538        return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
 539}
 540
 541static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
 542{
 543        return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
 544}
 545
 546static inline unsigned short cma_family(struct rdma_id_private *id_priv)
 547{
 548        return id_priv->id.route.addr.src_addr.ss_family;
 549}
 550
 551static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
 552{
 553        struct ib_sa_mcmember_rec rec;
 554        int ret = 0;
 555
 556        if (id_priv->qkey) {
 557                if (qkey && id_priv->qkey != qkey)
 558                        return -EINVAL;
 559                return 0;
 560        }
 561
 562        if (qkey) {
 563                id_priv->qkey = qkey;
 564                return 0;
 565        }
 566
 567        switch (id_priv->id.ps) {
 568        case RDMA_PS_UDP:
 569        case RDMA_PS_IB:
 570                id_priv->qkey = RDMA_UDP_QKEY;
 571                break;
 572        case RDMA_PS_IPOIB:
 573                ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
 574                ret = ib_sa_get_mcmember_rec(id_priv->id.device,
 575                                             id_priv->id.port_num, &rec.mgid,
 576                                             &rec);
 577                if (!ret)
 578                        id_priv->qkey = be32_to_cpu(rec.qkey);
 579                break;
 580        default:
 581                break;
 582        }
 583        return ret;
 584}
 585
 586static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
 587{
 588        dev_addr->dev_type = ARPHRD_INFINIBAND;
 589        rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
 590        ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
 591}
 592
 593static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 594{
 595        int ret;
 596
 597        if (addr->sa_family != AF_IB) {
 598                ret = rdma_translate_ip(addr, dev_addr);
 599        } else {
 600                cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
 601                ret = 0;
 602        }
 603
 604        return ret;
 605}
 606
 607static const struct ib_gid_attr *
 608cma_validate_port(struct ib_device *device, u8 port,
 609                  enum ib_gid_type gid_type,
 610                  union ib_gid *gid,
 611                  struct rdma_id_private *id_priv)
 612{
 613        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 614        int bound_if_index = dev_addr->bound_dev_if;
 615        const struct ib_gid_attr *sgid_attr;
 616        int dev_type = dev_addr->dev_type;
 617        struct net_device *ndev = NULL;
 618
 619        if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net))
 620                return ERR_PTR(-ENODEV);
 621
 622        if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
 623                return ERR_PTR(-ENODEV);
 624
 625        if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
 626                return ERR_PTR(-ENODEV);
 627
 628        if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
 629                ndev = dev_get_by_index(dev_addr->net, bound_if_index);
 630                if (!ndev)
 631                        return ERR_PTR(-ENODEV);
 632        } else {
 633                gid_type = IB_GID_TYPE_IB;
 634        }
 635
 636        sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
 637        if (ndev)
 638                dev_put(ndev);
 639        return sgid_attr;
 640}
 641
 642static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
 643                               const struct ib_gid_attr *sgid_attr)
 644{
 645        WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr);
 646        id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
 647}
 648
 649/**
 650 * cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute
 651 * based on source ip address.
 652 * @id_priv:    cm_id which should be bound to cma device
 653 *
 654 * cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute
 655 * based on source IP address. It returns 0 on success or error code otherwise.
 656 * It is applicable to active and passive side cm_id.
 657 */
 658static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
 659{
 660        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 661        const struct ib_gid_attr *sgid_attr;
 662        union ib_gid gid, iboe_gid, *gidp;
 663        struct cma_device *cma_dev;
 664        enum ib_gid_type gid_type;
 665        int ret = -ENODEV;
 666        unsigned int port;
 667
 668        if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
 669            id_priv->id.ps == RDMA_PS_IPOIB)
 670                return -EINVAL;
 671
 672        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
 673                    &iboe_gid);
 674
 675        memcpy(&gid, dev_addr->src_dev_addr +
 676               rdma_addr_gid_offset(dev_addr), sizeof(gid));
 677
 678        mutex_lock(&lock);
 679        list_for_each_entry(cma_dev, &dev_list, list) {
 680                rdma_for_each_port (cma_dev->device, port) {
 681                        gidp = rdma_protocol_roce(cma_dev->device, port) ?
 682                               &iboe_gid : &gid;
 683                        gid_type = cma_dev->default_gid_type[port - 1];
 684                        sgid_attr = cma_validate_port(cma_dev->device, port,
 685                                                      gid_type, gidp, id_priv);
 686                        if (!IS_ERR(sgid_attr)) {
 687                                id_priv->id.port_num = port;
 688                                cma_bind_sgid_attr(id_priv, sgid_attr);
 689                                cma_attach_to_dev(id_priv, cma_dev);
 690                                ret = 0;
 691                                goto out;
 692                        }
 693                }
 694        }
 695out:
 696        mutex_unlock(&lock);
 697        return ret;
 698}
 699
 700/**
 701 * cma_ib_acquire_dev - Acquire cma device, port and SGID attribute
 702 * @id_priv:            cm id to bind to cma device
 703 * @listen_id_priv:     listener cm id to match against
 704 * @req:                Pointer to req structure containaining incoming
 705 *                      request information
 706 * cma_ib_acquire_dev() acquires cma device, port and SGID attribute when
 707 * rdma device matches for listen_id and incoming request. It also verifies
 708 * that a GID table entry is present for the source address.
 709 * Returns 0 on success, or returns error code otherwise.
 710 */
 711static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
 712                              const struct rdma_id_private *listen_id_priv,
 713                              struct cma_req_info *req)
 714{
 715        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 716        const struct ib_gid_attr *sgid_attr;
 717        enum ib_gid_type gid_type;
 718        union ib_gid gid;
 719
 720        if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
 721            id_priv->id.ps == RDMA_PS_IPOIB)
 722                return -EINVAL;
 723
 724        if (rdma_protocol_roce(req->device, req->port))
 725                rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
 726                            &gid);
 727        else
 728                memcpy(&gid, dev_addr->src_dev_addr +
 729                       rdma_addr_gid_offset(dev_addr), sizeof(gid));
 730
 731        gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1];
 732        sgid_attr = cma_validate_port(req->device, req->port,
 733                                      gid_type, &gid, id_priv);
 734        if (IS_ERR(sgid_attr))
 735                return PTR_ERR(sgid_attr);
 736
 737        id_priv->id.port_num = req->port;
 738        cma_bind_sgid_attr(id_priv, sgid_attr);
 739        /* Need to acquire lock to protect against reader
 740         * of cma_dev->id_list such as cma_netdev_callback() and
 741         * cma_process_remove().
 742         */
 743        mutex_lock(&lock);
 744        cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
 745        mutex_unlock(&lock);
 746        return 0;
 747}
 748
 749static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
 750                              const struct rdma_id_private *listen_id_priv)
 751{
 752        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 753        const struct ib_gid_attr *sgid_attr;
 754        struct cma_device *cma_dev;
 755        enum ib_gid_type gid_type;
 756        int ret = -ENODEV;
 757        union ib_gid gid;
 758        u8 port;
 759
 760        if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
 761            id_priv->id.ps == RDMA_PS_IPOIB)
 762                return -EINVAL;
 763
 764        memcpy(&gid, dev_addr->src_dev_addr +
 765               rdma_addr_gid_offset(dev_addr), sizeof(gid));
 766
 767        mutex_lock(&lock);
 768
 769        cma_dev = listen_id_priv->cma_dev;
 770        port = listen_id_priv->id.port_num;
 771        gid_type = listen_id_priv->gid_type;
 772        sgid_attr = cma_validate_port(cma_dev->device, port,
 773                                      gid_type, &gid, id_priv);
 774        if (!IS_ERR(sgid_attr)) {
 775                id_priv->id.port_num = port;
 776                cma_bind_sgid_attr(id_priv, sgid_attr);
 777                ret = 0;
 778                goto out;
 779        }
 780
 781        list_for_each_entry(cma_dev, &dev_list, list) {
 782                for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
 783                        if (listen_id_priv->cma_dev == cma_dev &&
 784                            listen_id_priv->id.port_num == port)
 785                                continue;
 786
 787                        gid_type = cma_dev->default_gid_type[port - 1];
 788                        sgid_attr = cma_validate_port(cma_dev->device, port,
 789                                                      gid_type, &gid, id_priv);
 790                        if (!IS_ERR(sgid_attr)) {
 791                                id_priv->id.port_num = port;
 792                                cma_bind_sgid_attr(id_priv, sgid_attr);
 793                                ret = 0;
 794                                goto out;
 795                        }
 796                }
 797        }
 798
 799out:
 800        if (!ret)
 801                cma_attach_to_dev(id_priv, cma_dev);
 802
 803        mutex_unlock(&lock);
 804        return ret;
 805}
 806
 807/*
 808 * Select the source IB device and address to reach the destination IB address.
 809 */
 810static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
 811{
 812        struct cma_device *cma_dev, *cur_dev;
 813        struct sockaddr_ib *addr;
 814        union ib_gid gid, sgid, *dgid;
 815        u16 pkey, index;
 816        u8 p;
 817        enum ib_port_state port_state;
 818        int i;
 819
 820        cma_dev = NULL;
 821        addr = (struct sockaddr_ib *) cma_dst_addr(id_priv);
 822        dgid = (union ib_gid *) &addr->sib_addr;
 823        pkey = ntohs(addr->sib_pkey);
 824
 825        mutex_lock(&lock);
 826        list_for_each_entry(cur_dev, &dev_list, list) {
 827                for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
 828                        if (!rdma_cap_af_ib(cur_dev->device, p))
 829                                continue;
 830
 831                        if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
 832                                continue;
 833
 834                        if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
 835                                continue;
 836                        for (i = 0; !rdma_query_gid(cur_dev->device,
 837                                                    p, i, &gid);
 838                             i++) {
 839                                if (!memcmp(&gid, dgid, sizeof(gid))) {
 840                                        cma_dev = cur_dev;
 841                                        sgid = gid;
 842                                        id_priv->id.port_num = p;
 843                                        goto found;
 844                                }
 845
 846                                if (!cma_dev && (gid.global.subnet_prefix ==
 847                                    dgid->global.subnet_prefix) &&
 848                                    port_state == IB_PORT_ACTIVE) {
 849                                        cma_dev = cur_dev;
 850                                        sgid = gid;
 851                                        id_priv->id.port_num = p;
 852                                        goto found;
 853                                }
 854                        }
 855                }
 856        }
 857        mutex_unlock(&lock);
 858        return -ENODEV;
 859
 860found:
 861        cma_attach_to_dev(id_priv, cma_dev);
 862        mutex_unlock(&lock);
 863        addr = (struct sockaddr_ib *)cma_src_addr(id_priv);
 864        memcpy(&addr->sib_addr, &sgid, sizeof(sgid));
 865        cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
 866        return 0;
 867}
 868
 869static void cma_deref_id(struct rdma_id_private *id_priv)
 870{
 871        if (atomic_dec_and_test(&id_priv->refcount))
 872                complete(&id_priv->comp);
 873}
 874
 875struct rdma_cm_id *__rdma_create_id(struct net *net,
 876                                    rdma_cm_event_handler event_handler,
 877                                    void *context, enum rdma_ucm_port_space ps,
 878                                    enum ib_qp_type qp_type, const char *caller)
 879{
 880        struct rdma_id_private *id_priv;
 881
 882        id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
 883        if (!id_priv)
 884                return ERR_PTR(-ENOMEM);
 885
 886        rdma_restrack_set_task(&id_priv->res, caller);
 887        id_priv->res.type = RDMA_RESTRACK_CM_ID;
 888        id_priv->state = RDMA_CM_IDLE;
 889        id_priv->id.context = context;
 890        id_priv->id.event_handler = event_handler;
 891        id_priv->id.ps = ps;
 892        id_priv->id.qp_type = qp_type;
 893        id_priv->tos_set = false;
 894        id_priv->timeout_set = false;
 895        id_priv->gid_type = IB_GID_TYPE_IB;
 896        spin_lock_init(&id_priv->lock);
 897        mutex_init(&id_priv->qp_mutex);
 898        init_completion(&id_priv->comp);
 899        atomic_set(&id_priv->refcount, 1);
 900        mutex_init(&id_priv->handler_mutex);
 901        INIT_LIST_HEAD(&id_priv->listen_list);
 902        INIT_LIST_HEAD(&id_priv->mc_list);
 903        get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
 904        id_priv->id.route.addr.dev_addr.net = get_net(net);
 905        id_priv->seq_num &= 0x00ffffff;
 906
 907        return &id_priv->id;
 908}
 909EXPORT_SYMBOL(__rdma_create_id);
 910
 911static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 912{
 913        struct ib_qp_attr qp_attr;
 914        int qp_attr_mask, ret;
 915
 916        qp_attr.qp_state = IB_QPS_INIT;
 917        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 918        if (ret)
 919                return ret;
 920
 921        ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
 922        if (ret)
 923                return ret;
 924
 925        qp_attr.qp_state = IB_QPS_RTR;
 926        ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
 927        if (ret)
 928                return ret;
 929
 930        qp_attr.qp_state = IB_QPS_RTS;
 931        qp_attr.sq_psn = 0;
 932        ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
 933
 934        return ret;
 935}
 936
 937static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 938{
 939        struct ib_qp_attr qp_attr;
 940        int qp_attr_mask, ret;
 941
 942        qp_attr.qp_state = IB_QPS_INIT;
 943        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 944        if (ret)
 945                return ret;
 946
 947        return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
 948}
 949
 950int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 951                   struct ib_qp_init_attr *qp_init_attr)
 952{
 953        struct rdma_id_private *id_priv;
 954        struct ib_qp *qp;
 955        int ret;
 956
 957        id_priv = container_of(id, struct rdma_id_private, id);
 958        if (id->device != pd->device)
 959                return -EINVAL;
 960
 961        qp_init_attr->port_num = id->port_num;
 962        qp = ib_create_qp(pd, qp_init_attr);
 963        if (IS_ERR(qp))
 964                return PTR_ERR(qp);
 965
 966        if (id->qp_type == IB_QPT_UD)
 967                ret = cma_init_ud_qp(id_priv, qp);
 968        else
 969                ret = cma_init_conn_qp(id_priv, qp);
 970        if (ret)
 971                goto err;
 972
 973        id->qp = qp;
 974        id_priv->qp_num = qp->qp_num;
 975        id_priv->srq = (qp->srq != NULL);
 976        return 0;
 977err:
 978        ib_destroy_qp(qp);
 979        return ret;
 980}
 981EXPORT_SYMBOL(rdma_create_qp);
 982
 983void rdma_destroy_qp(struct rdma_cm_id *id)
 984{
 985        struct rdma_id_private *id_priv;
 986
 987        id_priv = container_of(id, struct rdma_id_private, id);
 988        mutex_lock(&id_priv->qp_mutex);
 989        ib_destroy_qp(id_priv->id.qp);
 990        id_priv->id.qp = NULL;
 991        mutex_unlock(&id_priv->qp_mutex);
 992}
 993EXPORT_SYMBOL(rdma_destroy_qp);
 994
 995static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
 996                             struct rdma_conn_param *conn_param)
 997{
 998        struct ib_qp_attr qp_attr;
 999        int qp_attr_mask, ret;
1000
1001        mutex_lock(&id_priv->qp_mutex);
1002        if (!id_priv->id.qp) {
1003                ret = 0;
1004                goto out;
1005        }
1006
1007        /* Need to update QP attributes from default values. */
1008        qp_attr.qp_state = IB_QPS_INIT;
1009        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
1010        if (ret)
1011                goto out;
1012
1013        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
1014        if (ret)
1015                goto out;
1016
1017        qp_attr.qp_state = IB_QPS_RTR;
1018        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
1019        if (ret)
1020                goto out;
1021
1022        BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
1023
1024        if (conn_param)
1025                qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
1026        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
1027out:
1028        mutex_unlock(&id_priv->qp_mutex);
1029        return ret;
1030}
1031
1032static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
1033                             struct rdma_conn_param *conn_param)
1034{
1035        struct ib_qp_attr qp_attr;
1036        int qp_attr_mask, ret;
1037
1038        mutex_lock(&id_priv->qp_mutex);
1039        if (!id_priv->id.qp) {
1040                ret = 0;
1041                goto out;
1042        }
1043
1044        qp_attr.qp_state = IB_QPS_RTS;
1045        ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
1046        if (ret)
1047                goto out;
1048
1049        if (conn_param)
1050                qp_attr.max_rd_atomic = conn_param->initiator_depth;
1051        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
1052out:
1053        mutex_unlock(&id_priv->qp_mutex);
1054        return ret;
1055}
1056
1057static int cma_modify_qp_err(struct rdma_id_private *id_priv)
1058{
1059        struct ib_qp_attr qp_attr;
1060        int ret;
1061
1062        mutex_lock(&id_priv->qp_mutex);
1063        if (!id_priv->id.qp) {
1064                ret = 0;
1065                goto out;
1066        }
1067
1068        qp_attr.qp_state = IB_QPS_ERR;
1069        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
1070out:
1071        mutex_unlock(&id_priv->qp_mutex);
1072        return ret;
1073}
1074
1075static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
1076                               struct ib_qp_attr *qp_attr, int *qp_attr_mask)
1077{
1078        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
1079        int ret;
1080        u16 pkey;
1081
1082        if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num))
1083                pkey = 0xffff;
1084        else
1085                pkey = ib_addr_get_pkey(dev_addr);
1086
1087        ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
1088                                  pkey, &qp_attr->pkey_index);
1089        if (ret)
1090                return ret;
1091
1092        qp_attr->port_num = id_priv->id.port_num;
1093        *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
1094
1095        if (id_priv->id.qp_type == IB_QPT_UD) {
1096                ret = cma_set_qkey(id_priv, 0);
1097                if (ret)
1098                        return ret;
1099
1100                qp_attr->qkey = id_priv->qkey;
1101                *qp_attr_mask |= IB_QP_QKEY;
1102        } else {
1103                qp_attr->qp_access_flags = 0;
1104                *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
1105        }
1106        return 0;
1107}
1108
1109int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
1110                       int *qp_attr_mask)
1111{
1112        struct rdma_id_private *id_priv;
1113        int ret = 0;
1114
1115        id_priv = container_of(id, struct rdma_id_private, id);
1116        if (rdma_cap_ib_cm(id->device, id->port_num)) {
1117                if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))
1118                        ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
1119                else
1120                        ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
1121                                                 qp_attr_mask);
1122
1123                if (qp_attr->qp_state == IB_QPS_RTR)
1124                        qp_attr->rq_psn = id_priv->seq_num;
1125        } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
1126                if (!id_priv->cm_id.iw) {
1127                        qp_attr->qp_access_flags = 0;
1128                        *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
1129                } else
1130                        ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
1131                                                 qp_attr_mask);
1132                qp_attr->port_num = id_priv->id.port_num;
1133                *qp_attr_mask |= IB_QP_PORT;
1134        } else
1135                ret = -ENOSYS;
1136
1137        if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
1138                qp_attr->timeout = id_priv->timeout;
1139
1140        return ret;
1141}
1142EXPORT_SYMBOL(rdma_init_qp_attr);
1143
1144static inline bool cma_zero_addr(const struct sockaddr *addr)
1145{
1146        switch (addr->sa_family) {
1147        case AF_INET:
1148                return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
1149        case AF_INET6:
1150                return ipv6_addr_any(&((struct sockaddr_in6 *)addr)->sin6_addr);
1151        case AF_IB:
1152                return ib_addr_any(&((struct sockaddr_ib *)addr)->sib_addr);
1153        default:
1154                return false;
1155        }
1156}
1157
1158static inline bool cma_loopback_addr(const struct sockaddr *addr)
1159{
1160        switch (addr->sa_family) {
1161        case AF_INET:
1162                return ipv4_is_loopback(
1163                        ((struct sockaddr_in *)addr)->sin_addr.s_addr);
1164        case AF_INET6:
1165                return ipv6_addr_loopback(
1166                        &((struct sockaddr_in6 *)addr)->sin6_addr);
1167        case AF_IB:
1168                return ib_addr_loopback(
1169                        &((struct sockaddr_ib *)addr)->sib_addr);
1170        default:
1171                return false;
1172        }
1173}
1174
1175static inline bool cma_any_addr(const struct sockaddr *addr)
1176{
1177        return cma_zero_addr(addr) || cma_loopback_addr(addr);
1178}
1179
1180static int cma_addr_cmp(const struct sockaddr *src, const struct sockaddr *dst)
1181{
1182        if (src->sa_family != dst->sa_family)
1183                return -1;
1184
1185        switch (src->sa_family) {
1186        case AF_INET:
1187                return ((struct sockaddr_in *)src)->sin_addr.s_addr !=
1188                       ((struct sockaddr_in *)dst)->sin_addr.s_addr;
1189        case AF_INET6: {
1190                struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)src;
1191                struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst;
1192                bool link_local;
1193
1194                if (ipv6_addr_cmp(&src_addr6->sin6_addr,
1195                                          &dst_addr6->sin6_addr))
1196                        return 1;
1197                link_local = ipv6_addr_type(&dst_addr6->sin6_addr) &
1198                             IPV6_ADDR_LINKLOCAL;
1199                /* Link local must match their scope_ids */
1200                return link_local ? (src_addr6->sin6_scope_id !=
1201                                     dst_addr6->sin6_scope_id) :
1202                                    0;
1203        }
1204
1205        default:
1206                return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
1207                                   &((struct sockaddr_ib *) dst)->sib_addr);
1208        }
1209}
1210
1211static __be16 cma_port(const struct sockaddr *addr)
1212{
1213        struct sockaddr_ib *sib;
1214
1215        switch (addr->sa_family) {
1216        case AF_INET:
1217                return ((struct sockaddr_in *) addr)->sin_port;
1218        case AF_INET6:
1219                return ((struct sockaddr_in6 *) addr)->sin6_port;
1220        case AF_IB:
1221                sib = (struct sockaddr_ib *) addr;
1222                return htons((u16) (be64_to_cpu(sib->sib_sid) &
1223                                    be64_to_cpu(sib->sib_sid_mask)));
1224        default:
1225                return 0;
1226        }
1227}
1228
1229static inline int cma_any_port(const struct sockaddr *addr)
1230{
1231        return !cma_port(addr);
1232}
1233
1234static void cma_save_ib_info(struct sockaddr *src_addr,
1235                             struct sockaddr *dst_addr,
1236                             const struct rdma_cm_id *listen_id,
1237                             const struct sa_path_rec *path)
1238{
1239        struct sockaddr_ib *listen_ib, *ib;
1240
1241        listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
1242        if (src_addr) {
1243                ib = (struct sockaddr_ib *)src_addr;
1244                ib->sib_family = AF_IB;
1245                if (path) {
1246                        ib->sib_pkey = path->pkey;
1247                        ib->sib_flowinfo = path->flow_label;
1248                        memcpy(&ib->sib_addr, &path->sgid, 16);
1249                        ib->sib_sid = path->service_id;
1250                        ib->sib_scope_id = 0;
1251                } else {
1252                        ib->sib_pkey = listen_ib->sib_pkey;
1253                        ib->sib_flowinfo = listen_ib->sib_flowinfo;
1254                        ib->sib_addr = listen_ib->sib_addr;
1255                        ib->sib_sid = listen_ib->sib_sid;
1256                        ib->sib_scope_id = listen_ib->sib_scope_id;
1257                }
1258                ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
1259        }
1260        if (dst_addr) {
1261                ib = (struct sockaddr_ib *)dst_addr;
1262                ib->sib_family = AF_IB;
1263                if (path) {
1264                        ib->sib_pkey = path->pkey;
1265                        ib->sib_flowinfo = path->flow_label;
1266                        memcpy(&ib->sib_addr, &path->dgid, 16);
1267                }
1268        }
1269}
1270
1271static void cma_save_ip4_info(struct sockaddr_in *src_addr,
1272                              struct sockaddr_in *dst_addr,
1273                              struct cma_hdr *hdr,
1274                              __be16 local_port)
1275{
1276        if (src_addr) {
1277                *src_addr = (struct sockaddr_in) {
1278                        .sin_family = AF_INET,
1279                        .sin_addr.s_addr = hdr->dst_addr.ip4.addr,
1280                        .sin_port = local_port,
1281                };
1282        }
1283
1284        if (dst_addr) {
1285                *dst_addr = (struct sockaddr_in) {
1286                        .sin_family = AF_INET,
1287                        .sin_addr.s_addr = hdr->src_addr.ip4.addr,
1288                        .sin_port = hdr->port,
1289                };
1290        }
1291}
1292
1293static void cma_save_ip6_info(struct sockaddr_in6 *src_addr,
1294                              struct sockaddr_in6 *dst_addr,
1295                              struct cma_hdr *hdr,
1296                              __be16 local_port)
1297{
1298        if (src_addr) {
1299                *src_addr = (struct sockaddr_in6) {
1300                        .sin6_family = AF_INET6,
1301                        .sin6_addr = hdr->dst_addr.ip6,
1302                        .sin6_port = local_port,
1303                };
1304        }
1305
1306        if (dst_addr) {
1307                *dst_addr = (struct sockaddr_in6) {
1308                        .sin6_family = AF_INET6,
1309                        .sin6_addr = hdr->src_addr.ip6,
1310                        .sin6_port = hdr->port,
1311                };
1312        }
1313}
1314
1315static u16 cma_port_from_service_id(__be64 service_id)
1316{
1317        return (u16)be64_to_cpu(service_id);
1318}
1319
1320static int cma_save_ip_info(struct sockaddr *src_addr,
1321                            struct sockaddr *dst_addr,
1322                            const struct ib_cm_event *ib_event,
1323                            __be64 service_id)
1324{
1325        struct cma_hdr *hdr;
1326        __be16 port;
1327
1328        hdr = ib_event->private_data;
1329        if (hdr->cma_version != CMA_VERSION)
1330                return -EINVAL;
1331
1332        port = htons(cma_port_from_service_id(service_id));
1333
1334        switch (cma_get_ip_ver(hdr)) {
1335        case 4:
1336                cma_save_ip4_info((struct sockaddr_in *)src_addr,
1337                                  (struct sockaddr_in *)dst_addr, hdr, port);
1338                break;
1339        case 6:
1340                cma_save_ip6_info((struct sockaddr_in6 *)src_addr,
1341                                  (struct sockaddr_in6 *)dst_addr, hdr, port);
1342                break;
1343        default:
1344                return -EAFNOSUPPORT;
1345        }
1346
1347        return 0;
1348}
1349
1350static int cma_save_net_info(struct sockaddr *src_addr,
1351                             struct sockaddr *dst_addr,
1352                             const struct rdma_cm_id *listen_id,
1353                             const struct ib_cm_event *ib_event,
1354                             sa_family_t sa_family, __be64 service_id)
1355{
1356        if (sa_family == AF_IB) {
1357                if (ib_event->event == IB_CM_REQ_RECEIVED)
1358                        cma_save_ib_info(src_addr, dst_addr, listen_id,
1359                                         ib_event->param.req_rcvd.primary_path);
1360                else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
1361                        cma_save_ib_info(src_addr, dst_addr, listen_id, NULL);
1362                return 0;
1363        }
1364
1365        return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id);
1366}
1367
1368static int cma_save_req_info(const struct ib_cm_event *ib_event,
1369                             struct cma_req_info *req)
1370{
1371        const struct ib_cm_req_event_param *req_param =
1372                &ib_event->param.req_rcvd;
1373        const struct ib_cm_sidr_req_event_param *sidr_param =
1374                &ib_event->param.sidr_req_rcvd;
1375
1376        switch (ib_event->event) {
1377        case IB_CM_REQ_RECEIVED:
1378                req->device     = req_param->listen_id->device;
1379                req->port       = req_param->port;
1380                memcpy(&req->local_gid, &req_param->primary_path->sgid,
1381                       sizeof(req->local_gid));
1382                req->has_gid    = true;
1383                req->service_id = req_param->primary_path->service_id;
1384                req->pkey       = be16_to_cpu(req_param->primary_path->pkey);
1385                if (req->pkey != req_param->bth_pkey)
1386                        pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
1387                                            "RDMA CMA: in the future this may cause the request to be dropped\n",
1388                                            req_param->bth_pkey, req->pkey);
1389                break;
1390        case IB_CM_SIDR_REQ_RECEIVED:
1391                req->device     = sidr_param->listen_id->device;
1392                req->port       = sidr_param->port;
1393                req->has_gid    = false;
1394                req->service_id = sidr_param->service_id;
1395                req->pkey       = sidr_param->pkey;
1396                if (req->pkey != sidr_param->bth_pkey)
1397                        pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n"
1398                                            "RDMA CMA: in the future this may cause the request to be dropped\n",
1399                                            sidr_param->bth_pkey, req->pkey);
1400                break;
1401        default:
1402                return -EINVAL;
1403        }
1404
1405        return 0;
1406}
1407
1408static bool validate_ipv4_net_dev(struct net_device *net_dev,
1409                                  const struct sockaddr_in *dst_addr,
1410                                  const struct sockaddr_in *src_addr)
1411{
1412        __be32 daddr = dst_addr->sin_addr.s_addr,
1413               saddr = src_addr->sin_addr.s_addr;
1414        struct fib_result res;
1415        struct flowi4 fl4;
1416        int err;
1417        bool ret;
1418
1419        if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1420            ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) ||
1421            ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) ||
1422            ipv4_is_loopback(saddr))
1423                return false;
1424
1425        memset(&fl4, 0, sizeof(fl4));
1426        fl4.flowi4_iif = net_dev->ifindex;
1427        fl4.daddr = daddr;
1428        fl4.saddr = saddr;
1429
1430        rcu_read_lock();
1431        err = fib_lookup(dev_net(net_dev), &fl4, &res, 0);
1432        ret = err == 0 && FIB_RES_DEV(res) == net_dev;
1433        rcu_read_unlock();
1434
1435        return ret;
1436}
1437
1438static bool validate_ipv6_net_dev(struct net_device *net_dev,
1439                                  const struct sockaddr_in6 *dst_addr,
1440                                  const struct sockaddr_in6 *src_addr)
1441{
1442#if IS_ENABLED(CONFIG_IPV6)
1443        const int strict = ipv6_addr_type(&dst_addr->sin6_addr) &
1444                           IPV6_ADDR_LINKLOCAL;
1445        struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
1446                                         &src_addr->sin6_addr, net_dev->ifindex,
1447                                         NULL, strict);
1448        bool ret;
1449
1450        if (!rt)
1451                return false;
1452
1453        ret = rt->rt6i_idev->dev == net_dev;
1454        ip6_rt_put(rt);
1455
1456        return ret;
1457#else
1458        return false;
1459#endif
1460}
1461
1462static bool validate_net_dev(struct net_device *net_dev,
1463                             const struct sockaddr *daddr,
1464                             const struct sockaddr *saddr)
1465{
1466        const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr;
1467        const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr;
1468        const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
1469        const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr;
1470
1471        switch (daddr->sa_family) {
1472        case AF_INET:
1473                return saddr->sa_family == AF_INET &&
1474                       validate_ipv4_net_dev(net_dev, daddr4, saddr4);
1475
1476        case AF_INET6:
1477                return saddr->sa_family == AF_INET6 &&
1478                       validate_ipv6_net_dev(net_dev, daddr6, saddr6);
1479
1480        default:
1481                return false;
1482        }
1483}
1484
1485static struct net_device *
1486roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
1487{
1488        const struct ib_gid_attr *sgid_attr = NULL;
1489        struct net_device *ndev;
1490
1491        if (ib_event->event == IB_CM_REQ_RECEIVED)
1492                sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr;
1493        else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
1494                sgid_attr = ib_event->param.sidr_req_rcvd.sgid_attr;
1495
1496        if (!sgid_attr)
1497                return NULL;
1498
1499        rcu_read_lock();
1500        ndev = rdma_read_gid_attr_ndev_rcu(sgid_attr);
1501        if (IS_ERR(ndev))
1502                ndev = NULL;
1503        else
1504                dev_hold(ndev);
1505        rcu_read_unlock();
1506        return ndev;
1507}
1508
1509static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event,
1510                                          struct cma_req_info *req)
1511{
1512        struct sockaddr *listen_addr =
1513                        (struct sockaddr *)&req->listen_addr_storage;
1514        struct sockaddr *src_addr = (struct sockaddr *)&req->src_addr_storage;
1515        struct net_device *net_dev;
1516        const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL;
1517        int err;
1518
1519        err = cma_save_ip_info(listen_addr, src_addr, ib_event,
1520                               req->service_id);
1521        if (err)
1522                return ERR_PTR(err);
1523
1524        if (rdma_protocol_roce(req->device, req->port))
1525                net_dev = roce_get_net_dev_by_cm_event(ib_event);
1526        else
1527                net_dev = ib_get_net_dev_by_params(req->device, req->port,
1528                                                   req->pkey,
1529                                                   gid, listen_addr);
1530        if (!net_dev)
1531                return ERR_PTR(-ENODEV);
1532
1533        return net_dev;
1534}
1535
1536static enum rdma_ucm_port_space rdma_ps_from_service_id(__be64 service_id)
1537{
1538        return (be64_to_cpu(service_id) >> 16) & 0xffff;
1539}
1540
1541static bool cma_match_private_data(struct rdma_id_private *id_priv,
1542                                   const struct cma_hdr *hdr)
1543{
1544        struct sockaddr *addr = cma_src_addr(id_priv);
1545        __be32 ip4_addr;
1546        struct in6_addr ip6_addr;
1547
1548        if (cma_any_addr(addr) && !id_priv->afonly)
1549                return true;
1550
1551        switch (addr->sa_family) {
1552        case AF_INET:
1553                ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
1554                if (cma_get_ip_ver(hdr) != 4)
1555                        return false;
1556                if (!cma_any_addr(addr) &&
1557                    hdr->dst_addr.ip4.addr != ip4_addr)
1558                        return false;
1559                break;
1560        case AF_INET6:
1561                ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr;
1562                if (cma_get_ip_ver(hdr) != 6)
1563                        return false;
1564                if (!cma_any_addr(addr) &&
1565                    memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr)))
1566                        return false;
1567                break;
1568        case AF_IB:
1569                return true;
1570        default:
1571                return false;
1572        }
1573
1574        return true;
1575}
1576
1577static bool cma_protocol_roce(const struct rdma_cm_id *id)
1578{
1579        struct ib_device *device = id->device;
1580        const int port_num = id->port_num ?: rdma_start_port(device);
1581
1582        return rdma_protocol_roce(device, port_num);
1583}
1584
1585static bool cma_is_req_ipv6_ll(const struct cma_req_info *req)
1586{
1587        const struct sockaddr *daddr =
1588                        (const struct sockaddr *)&req->listen_addr_storage;
1589        const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
1590
1591        /* Returns true if the req is for IPv6 link local */
1592        return (daddr->sa_family == AF_INET6 &&
1593                (ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL));
1594}
1595
1596static bool cma_match_net_dev(const struct rdma_cm_id *id,
1597                              const struct net_device *net_dev,
1598                              const struct cma_req_info *req)
1599{
1600        const struct rdma_addr *addr = &id->route.addr;
1601
1602        if (!net_dev)
1603                /* This request is an AF_IB request */
1604                return (!id->port_num || id->port_num == req->port) &&
1605                       (addr->src_addr.ss_family == AF_IB);
1606
1607        /*
1608         * If the request is not for IPv6 link local, allow matching
1609         * request to any netdevice of the one or multiport rdma device.
1610         */
1611        if (!cma_is_req_ipv6_ll(req))
1612                return true;
1613        /*
1614         * Net namespaces must match, and if the listner is listening
1615         * on a specific netdevice than netdevice must match as well.
1616         */
1617        if (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
1618            (!!addr->dev_addr.bound_dev_if ==
1619             (addr->dev_addr.bound_dev_if == net_dev->ifindex)))
1620                return true;
1621        else
1622                return false;
1623}
1624
1625static struct rdma_id_private *cma_find_listener(
1626                const struct rdma_bind_list *bind_list,
1627                const struct ib_cm_id *cm_id,
1628                const struct ib_cm_event *ib_event,
1629                const struct cma_req_info *req,
1630                const struct net_device *net_dev)
1631{
1632        struct rdma_id_private *id_priv, *id_priv_dev;
1633
1634        if (!bind_list)
1635                return ERR_PTR(-EINVAL);
1636
1637        hlist_for_each_entry(id_priv, &bind_list->owners, node) {
1638                if (cma_match_private_data(id_priv, ib_event->private_data)) {
1639                        if (id_priv->id.device == cm_id->device &&
1640                            cma_match_net_dev(&id_priv->id, net_dev, req))
1641                                return id_priv;
1642                        list_for_each_entry(id_priv_dev,
1643                                            &id_priv->listen_list,
1644                                            listen_list) {
1645                                if (id_priv_dev->id.device == cm_id->device &&
1646                                    cma_match_net_dev(&id_priv_dev->id,
1647                                                      net_dev, req))
1648                                        return id_priv_dev;
1649                        }
1650                }
1651        }
1652
1653        return ERR_PTR(-EINVAL);
1654}
1655
1656static struct rdma_id_private *
1657cma_ib_id_from_event(struct ib_cm_id *cm_id,
1658                     const struct ib_cm_event *ib_event,
1659                     struct cma_req_info *req,
1660                     struct net_device **net_dev)
1661{
1662        struct rdma_bind_list *bind_list;
1663        struct rdma_id_private *id_priv;
1664        int err;
1665
1666        err = cma_save_req_info(ib_event, req);
1667        if (err)
1668                return ERR_PTR(err);
1669
1670        *net_dev = cma_get_net_dev(ib_event, req);
1671        if (IS_ERR(*net_dev)) {
1672                if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
1673                        /* Assuming the protocol is AF_IB */
1674                        *net_dev = NULL;
1675                } else {
1676                        return ERR_CAST(*net_dev);
1677                }
1678        }
1679
1680        /*
1681         * Net namespace might be getting deleted while route lookup,
1682         * cm_id lookup is in progress. Therefore, perform netdevice
1683         * validation, cm_id lookup under rcu lock.
1684         * RCU lock along with netdevice state check, synchronizes with
1685         * netdevice migrating to different net namespace and also avoids
1686         * case where net namespace doesn't get deleted while lookup is in
1687         * progress.
1688         * If the device state is not IFF_UP, its properties such as ifindex
1689         * and nd_net cannot be trusted to remain valid without rcu lock.
1690         * net/core/dev.c change_net_namespace() ensures to synchronize with
1691         * ongoing operations on net device after device is closed using
1692         * synchronize_net().
1693         */
1694        rcu_read_lock();
1695        if (*net_dev) {
1696                /*
1697                 * If netdevice is down, it is likely that it is administratively
1698                 * down or it might be migrating to different namespace.
1699                 * In that case avoid further processing, as the net namespace
1700                 * or ifindex may change.
1701                 */
1702                if (((*net_dev)->flags & IFF_UP) == 0) {
1703                        id_priv = ERR_PTR(-EHOSTUNREACH);
1704                        goto err;
1705                }
1706
1707                if (!validate_net_dev(*net_dev,
1708                                 (struct sockaddr *)&req->listen_addr_storage,
1709                                 (struct sockaddr *)&req->src_addr_storage)) {
1710                        id_priv = ERR_PTR(-EHOSTUNREACH);
1711                        goto err;
1712                }
1713        }
1714
1715        bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
1716                                rdma_ps_from_service_id(req->service_id),
1717                                cma_port_from_service_id(req->service_id));
1718        id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
1719err:
1720        rcu_read_unlock();
1721        if (IS_ERR(id_priv) && *net_dev) {
1722                dev_put(*net_dev);
1723                *net_dev = NULL;
1724        }
1725        return id_priv;
1726}
1727
1728static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv)
1729{
1730        return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
1731}
1732
1733static void cma_cancel_route(struct rdma_id_private *id_priv)
1734{
1735        if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) {
1736                if (id_priv->query)
1737                        ib_sa_cancel_query(id_priv->query_id, id_priv->query);
1738        }
1739}
1740
1741static void cma_cancel_listens(struct rdma_id_private *id_priv)
1742{
1743        struct rdma_id_private *dev_id_priv;
1744
1745        /*
1746         * Remove from listen_any_list to prevent added devices from spawning
1747         * additional listen requests.
1748         */
1749        mutex_lock(&lock);
1750        list_del(&id_priv->list);
1751
1752        while (!list_empty(&id_priv->listen_list)) {
1753                dev_id_priv = list_entry(id_priv->listen_list.next,
1754                                         struct rdma_id_private, listen_list);
1755                /* sync with device removal to avoid duplicate destruction */
1756                list_del_init(&dev_id_priv->list);
1757                list_del(&dev_id_priv->listen_list);
1758                mutex_unlock(&lock);
1759
1760                rdma_destroy_id(&dev_id_priv->id);
1761                mutex_lock(&lock);
1762        }
1763        mutex_unlock(&lock);
1764}
1765
1766static void cma_cancel_operation(struct rdma_id_private *id_priv,
1767                                 enum rdma_cm_state state)
1768{
1769        switch (state) {
1770        case RDMA_CM_ADDR_QUERY:
1771                rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
1772                break;
1773        case RDMA_CM_ROUTE_QUERY:
1774                cma_cancel_route(id_priv);
1775                break;
1776        case RDMA_CM_LISTEN:
1777                if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)
1778                        cma_cancel_listens(id_priv);
1779                break;
1780        default:
1781                break;
1782        }
1783}
1784
1785static void cma_release_port(struct rdma_id_private *id_priv)
1786{
1787        struct rdma_bind_list *bind_list = id_priv->bind_list;
1788        struct net *net = id_priv->id.route.addr.dev_addr.net;
1789
1790        if (!bind_list)
1791                return;
1792
1793        mutex_lock(&lock);
1794        hlist_del(&id_priv->node);
1795        if (hlist_empty(&bind_list->owners)) {
1796                cma_ps_remove(net, bind_list->ps, bind_list->port);
1797                kfree(bind_list);
1798        }
1799        mutex_unlock(&lock);
1800}
1801
1802static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
1803                                    struct cma_multicast *mc)
1804{
1805        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
1806        struct net_device *ndev = NULL;
1807
1808        if (dev_addr->bound_dev_if)
1809                ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
1810        if (ndev) {
1811                cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
1812                dev_put(ndev);
1813        }
1814        kref_put(&mc->mcref, release_mc);
1815}
1816
1817static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
1818{
1819        struct cma_multicast *mc;
1820
1821        while (!list_empty(&id_priv->mc_list)) {
1822                mc = container_of(id_priv->mc_list.next,
1823                                  struct cma_multicast, list);
1824                list_del(&mc->list);
1825                if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
1826                                      id_priv->id.port_num)) {
1827                        ib_sa_free_multicast(mc->multicast.ib);
1828                        kfree(mc);
1829                } else {
1830                        cma_leave_roce_mc_group(id_priv, mc);
1831                }
1832        }
1833}
1834
1835void rdma_destroy_id(struct rdma_cm_id *id)
1836{
1837        struct rdma_id_private *id_priv;
1838        enum rdma_cm_state state;
1839
1840        id_priv = container_of(id, struct rdma_id_private, id);
1841        state = cma_exch(id_priv, RDMA_CM_DESTROYING);
1842        cma_cancel_operation(id_priv, state);
1843
1844        /*
1845         * Wait for any active callback to finish.  New callbacks will find
1846         * the id_priv state set to destroying and abort.
1847         */
1848        mutex_lock(&id_priv->handler_mutex);
1849        mutex_unlock(&id_priv->handler_mutex);
1850
1851        rdma_restrack_del(&id_priv->res);
1852        if (id_priv->cma_dev) {
1853                if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
1854                        if (id_priv->cm_id.ib)
1855                                ib_destroy_cm_id(id_priv->cm_id.ib);
1856                } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) {
1857                        if (id_priv->cm_id.iw)
1858                                iw_destroy_cm_id(id_priv->cm_id.iw);
1859                }
1860                cma_leave_mc_groups(id_priv);
1861                cma_release_dev(id_priv);
1862        }
1863
1864        cma_release_port(id_priv);
1865        cma_deref_id(id_priv);
1866        wait_for_completion(&id_priv->comp);
1867
1868        if (id_priv->internal_id)
1869                cma_deref_id(id_priv->id.context);
1870
1871        kfree(id_priv->id.route.path_rec);
1872
1873        if (id_priv->id.route.addr.dev_addr.sgid_attr)
1874                rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
1875
1876        put_net(id_priv->id.route.addr.dev_addr.net);
1877        kfree(id_priv);
1878}
1879EXPORT_SYMBOL(rdma_destroy_id);
1880
1881static int cma_rep_recv(struct rdma_id_private *id_priv)
1882{
1883        int ret;
1884
1885        ret = cma_modify_qp_rtr(id_priv, NULL);
1886        if (ret)
1887                goto reject;
1888
1889        ret = cma_modify_qp_rts(id_priv, NULL);
1890        if (ret)
1891                goto reject;
1892
1893        ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
1894        if (ret)
1895                goto reject;
1896
1897        return 0;
1898reject:
1899        pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
1900        cma_modify_qp_err(id_priv);
1901        ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
1902                       NULL, 0, NULL, 0);
1903        return ret;
1904}
1905
1906static void cma_set_rep_event_data(struct rdma_cm_event *event,
1907                                   const struct ib_cm_rep_event_param *rep_data,
1908                                   void *private_data)
1909{
1910        event->param.conn.private_data = private_data;
1911        event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
1912        event->param.conn.responder_resources = rep_data->responder_resources;
1913        event->param.conn.initiator_depth = rep_data->initiator_depth;
1914        event->param.conn.flow_control = rep_data->flow_control;
1915        event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
1916        event->param.conn.srq = rep_data->srq;
1917        event->param.conn.qp_num = rep_data->remote_qpn;
1918}
1919
1920static int cma_ib_handler(struct ib_cm_id *cm_id,
1921                          const struct ib_cm_event *ib_event)
1922{
1923        struct rdma_id_private *id_priv = cm_id->context;
1924        struct rdma_cm_event event = {};
1925        int ret = 0;
1926
1927        mutex_lock(&id_priv->handler_mutex);
1928        if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
1929             id_priv->state != RDMA_CM_CONNECT) ||
1930            (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
1931             id_priv->state != RDMA_CM_DISCONNECT))
1932                goto out;
1933
1934        switch (ib_event->event) {
1935        case IB_CM_REQ_ERROR:
1936        case IB_CM_REP_ERROR:
1937                event.event = RDMA_CM_EVENT_UNREACHABLE;
1938                event.status = -ETIMEDOUT;
1939                break;
1940        case IB_CM_REP_RECEIVED:
1941                if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
1942                    (id_priv->id.qp_type != IB_QPT_UD))
1943                        ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1944                if (id_priv->id.qp) {
1945                        event.status = cma_rep_recv(id_priv);
1946                        event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
1947                                                     RDMA_CM_EVENT_ESTABLISHED;
1948                } else {
1949                        event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1950                }
1951                cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
1952                                       ib_event->private_data);
1953                break;
1954        case IB_CM_RTU_RECEIVED:
1955        case IB_CM_USER_ESTABLISHED:
1956                event.event = RDMA_CM_EVENT_ESTABLISHED;
1957                break;
1958        case IB_CM_DREQ_ERROR:
1959                event.status = -ETIMEDOUT; /* fall through */
1960        case IB_CM_DREQ_RECEIVED:
1961        case IB_CM_DREP_RECEIVED:
1962                if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
1963                                   RDMA_CM_DISCONNECT))
1964                        goto out;
1965                event.event = RDMA_CM_EVENT_DISCONNECTED;
1966                break;
1967        case IB_CM_TIMEWAIT_EXIT:
1968                event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
1969                break;
1970        case IB_CM_MRA_RECEIVED:
1971                /* ignore event */
1972                goto out;
1973        case IB_CM_REJ_RECEIVED:
1974                pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id,
1975                                                                                ib_event->param.rej_rcvd.reason));
1976                cma_modify_qp_err(id_priv);
1977                event.status = ib_event->param.rej_rcvd.reason;
1978                event.event = RDMA_CM_EVENT_REJECTED;
1979                event.param.conn.private_data = ib_event->private_data;
1980                event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1981                break;
1982        default:
1983                pr_err("RDMA CMA: unexpected IB CM event: %d\n",
1984                       ib_event->event);
1985                goto out;
1986        }
1987
1988        ret = id_priv->id.event_handler(&id_priv->id, &event);
1989        if (ret) {
1990                /* Destroy the CM ID by returning a non-zero value. */
1991                id_priv->cm_id.ib = NULL;
1992                cma_exch(id_priv, RDMA_CM_DESTROYING);
1993                mutex_unlock(&id_priv->handler_mutex);
1994                rdma_destroy_id(&id_priv->id);
1995                return ret;
1996        }
1997out:
1998        mutex_unlock(&id_priv->handler_mutex);
1999        return ret;
2000}
2001
2002static struct rdma_id_private *
2003cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
2004                   const struct ib_cm_event *ib_event,
2005                   struct net_device *net_dev)
2006{
2007        struct rdma_id_private *listen_id_priv;
2008        struct rdma_id_private *id_priv;
2009        struct rdma_cm_id *id;
2010        struct rdma_route *rt;
2011        const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
2012        struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path;
2013        const __be64 service_id =
2014                ib_event->param.req_rcvd.primary_path->service_id;
2015        int ret;
2016
2017        listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
2018        id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
2019                            listen_id->event_handler, listen_id->context,
2020                            listen_id->ps, ib_event->param.req_rcvd.qp_type,
2021                            listen_id_priv->res.kern_name);
2022        if (IS_ERR(id))
2023                return NULL;
2024
2025        id_priv = container_of(id, struct rdma_id_private, id);
2026        if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
2027                              (struct sockaddr *)&id->route.addr.dst_addr,
2028                              listen_id, ib_event, ss_family, service_id))
2029                goto err;
2030
2031        rt = &id->route;
2032        rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
2033        rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec),
2034                                     GFP_KERNEL);
2035        if (!rt->path_rec)
2036                goto err;
2037
2038        rt->path_rec[0] = *path;
2039        if (rt->num_paths == 2)
2040                rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
2041
2042        if (net_dev) {
2043                rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev);
2044        } else {
2045                if (!cma_protocol_roce(listen_id) &&
2046                    cma_any_addr(cma_src_addr(id_priv))) {
2047                        rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
2048                        rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
2049                        ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
2050                } else if (!cma_any_addr(cma_src_addr(id_priv))) {
2051                        ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
2052                        if (ret)
2053                                goto err;
2054                }
2055        }
2056        rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
2057
2058        id_priv->state = RDMA_CM_CONNECT;
2059        return id_priv;
2060
2061err:
2062        rdma_destroy_id(id);
2063        return NULL;
2064}
2065
2066static struct rdma_id_private *
2067cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
2068                  const struct ib_cm_event *ib_event,
2069                  struct net_device *net_dev)
2070{
2071        const struct rdma_id_private *listen_id_priv;
2072        struct rdma_id_private *id_priv;
2073        struct rdma_cm_id *id;
2074        const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
2075        struct net *net = listen_id->route.addr.dev_addr.net;
2076        int ret;
2077
2078        listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
2079        id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
2080                              listen_id->ps, IB_QPT_UD,
2081                              listen_id_priv->res.kern_name);
2082        if (IS_ERR(id))
2083                return NULL;
2084
2085        id_priv = container_of(id, struct rdma_id_private, id);
2086        if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
2087                              (struct sockaddr *)&id->route.addr.dst_addr,
2088                              listen_id, ib_event, ss_family,
2089                              ib_event->param.sidr_req_rcvd.service_id))
2090                goto err;
2091
2092        if (net_dev) {
2093                rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev);
2094        } else {
2095                if (!cma_any_addr(cma_src_addr(id_priv))) {
2096                        ret = cma_translate_addr(cma_src_addr(id_priv),
2097                                                 &id->route.addr.dev_addr);
2098                        if (ret)
2099                                goto err;
2100                }
2101        }
2102
2103        id_priv->state = RDMA_CM_CONNECT;
2104        return id_priv;
2105err:
2106        rdma_destroy_id(id);
2107        return NULL;
2108}
2109
2110static void cma_set_req_event_data(struct rdma_cm_event *event,
2111                                   const struct ib_cm_req_event_param *req_data,
2112                                   void *private_data, int offset)
2113{
2114        event->param.conn.private_data = private_data + offset;
2115        event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
2116        event->param.conn.responder_resources = req_data->responder_resources;
2117        event->param.conn.initiator_depth = req_data->initiator_depth;
2118        event->param.conn.flow_control = req_data->flow_control;
2119        event->param.conn.retry_count = req_data->retry_count;
2120        event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
2121        event->param.conn.srq = req_data->srq;
2122        event->param.conn.qp_num = req_data->remote_qpn;
2123}
2124
2125static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
2126                                    const struct ib_cm_event *ib_event)
2127{
2128        return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
2129                 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
2130                ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
2131                 (id->qp_type == IB_QPT_UD)) ||
2132                (!id->qp_type));
2133}
2134
2135static int cma_ib_req_handler(struct ib_cm_id *cm_id,
2136                              const struct ib_cm_event *ib_event)
2137{
2138        struct rdma_id_private *listen_id, *conn_id = NULL;
2139        struct rdma_cm_event event = {};
2140        struct cma_req_info req = {};
2141        struct net_device *net_dev;
2142        u8 offset;
2143        int ret;
2144
2145        listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev);
2146        if (IS_ERR(listen_id))
2147                return PTR_ERR(listen_id);
2148
2149        if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
2150                ret = -EINVAL;
2151                goto net_dev_put;
2152        }
2153
2154        mutex_lock(&listen_id->handler_mutex);
2155        if (listen_id->state != RDMA_CM_LISTEN) {
2156                ret = -ECONNABORTED;
2157                goto err1;
2158        }
2159
2160        offset = cma_user_data_offset(listen_id);
2161        event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
2162        if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
2163                conn_id = cma_ib_new_udp_id(&listen_id->id, ib_event, net_dev);
2164                event.param.ud.private_data = ib_event->private_data + offset;
2165                event.param.ud.private_data_len =
2166                                IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
2167        } else {
2168                conn_id = cma_ib_new_conn_id(&listen_id->id, ib_event, net_dev);
2169                cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
2170                                       ib_event->private_data, offset);
2171        }
2172        if (!conn_id) {
2173                ret = -ENOMEM;
2174                goto err1;
2175        }
2176
2177        mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2178        ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
2179        if (ret)
2180                goto err2;
2181
2182        conn_id->cm_id.ib = cm_id;
2183        cm_id->context = conn_id;
2184        cm_id->cm_handler = cma_ib_handler;
2185
2186        /*
2187         * Protect against the user destroying conn_id from another thread
2188         * until we're done accessing it.
2189         */
2190        atomic_inc(&conn_id->refcount);
2191        ret = conn_id->id.event_handler(&conn_id->id, &event);
2192        if (ret)
2193                goto err3;
2194        /*
2195         * Acquire mutex to prevent user executing rdma_destroy_id()
2196         * while we're accessing the cm_id.
2197         */
2198        mutex_lock(&lock);
2199        if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
2200            (conn_id->id.qp_type != IB_QPT_UD))
2201                ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
2202        mutex_unlock(&lock);
2203        mutex_unlock(&conn_id->handler_mutex);
2204        mutex_unlock(&listen_id->handler_mutex);
2205        cma_deref_id(conn_id);
2206        if (net_dev)
2207                dev_put(net_dev);
2208        return 0;
2209
2210err3:
2211        cma_deref_id(conn_id);
2212        /* Destroy the CM ID by returning a non-zero value. */
2213        conn_id->cm_id.ib = NULL;
2214err2:
2215        cma_exch(conn_id, RDMA_CM_DESTROYING);
2216        mutex_unlock(&conn_id->handler_mutex);
2217err1:
2218        mutex_unlock(&listen_id->handler_mutex);
2219        if (conn_id)
2220                rdma_destroy_id(&conn_id->id);
2221
2222net_dev_put:
2223        if (net_dev)
2224                dev_put(net_dev);
2225
2226        return ret;
2227}
2228
2229__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
2230{
2231        if (addr->sa_family == AF_IB)
2232                return ((struct sockaddr_ib *) addr)->sib_sid;
2233
2234        return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));
2235}
2236EXPORT_SYMBOL(rdma_get_service_id);
2237
2238void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
2239                    union ib_gid *dgid)
2240{
2241        struct rdma_addr *addr = &cm_id->route.addr;
2242
2243        if (!cm_id->device) {
2244                if (sgid)
2245                        memset(sgid, 0, sizeof(*sgid));
2246                if (dgid)
2247                        memset(dgid, 0, sizeof(*dgid));
2248                return;
2249        }
2250
2251        if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) {
2252                if (sgid)
2253                        rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid);
2254                if (dgid)
2255                        rdma_ip2gid((struct sockaddr *)&addr->dst_addr, dgid);
2256        } else {
2257                if (sgid)
2258                        rdma_addr_get_sgid(&addr->dev_addr, sgid);
2259                if (dgid)
2260                        rdma_addr_get_dgid(&addr->dev_addr, dgid);
2261        }
2262}
2263EXPORT_SYMBOL(rdma_read_gids);
2264
2265static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
2266{
2267        struct rdma_id_private *id_priv = iw_id->context;
2268        struct rdma_cm_event event = {};
2269        int ret = 0;
2270        struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2271        struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
2272
2273        mutex_lock(&id_priv->handler_mutex);
2274        if (id_priv->state != RDMA_CM_CONNECT)
2275                goto out;
2276
2277        switch (iw_event->event) {
2278        case IW_CM_EVENT_CLOSE:
2279                event.event = RDMA_CM_EVENT_DISCONNECTED;
2280                break;
2281        case IW_CM_EVENT_CONNECT_REPLY:
2282                memcpy(cma_src_addr(id_priv), laddr,
2283                       rdma_addr_size(laddr));
2284                memcpy(cma_dst_addr(id_priv), raddr,
2285                       rdma_addr_size(raddr));
2286                switch (iw_event->status) {
2287                case 0:
2288                        event.event = RDMA_CM_EVENT_ESTABLISHED;
2289                        event.param.conn.initiator_depth = iw_event->ird;
2290                        event.param.conn.responder_resources = iw_event->ord;
2291                        break;
2292                case -ECONNRESET:
2293                case -ECONNREFUSED:
2294                        event.event = RDMA_CM_EVENT_REJECTED;
2295                        break;
2296                case -ETIMEDOUT:
2297                        event.event = RDMA_CM_EVENT_UNREACHABLE;
2298                        break;
2299                default:
2300                        event.event = RDMA_CM_EVENT_CONNECT_ERROR;
2301                        break;
2302                }
2303                break;
2304        case IW_CM_EVENT_ESTABLISHED:
2305                event.event = RDMA_CM_EVENT_ESTABLISHED;
2306                event.param.conn.initiator_depth = iw_event->ird;
2307                event.param.conn.responder_resources = iw_event->ord;
2308                break;
2309        default:
2310                goto out;
2311        }
2312
2313        event.status = iw_event->status;
2314        event.param.conn.private_data = iw_event->private_data;
2315        event.param.conn.private_data_len = iw_event->private_data_len;
2316        ret = id_priv->id.event_handler(&id_priv->id, &event);
2317        if (ret) {
2318                /* Destroy the CM ID by returning a non-zero value. */
2319                id_priv->cm_id.iw = NULL;
2320                cma_exch(id_priv, RDMA_CM_DESTROYING);
2321                mutex_unlock(&id_priv->handler_mutex);
2322                rdma_destroy_id(&id_priv->id);
2323                return ret;
2324        }
2325
2326out:
2327        mutex_unlock(&id_priv->handler_mutex);
2328        return ret;
2329}
2330
2331static int iw_conn_req_handler(struct iw_cm_id *cm_id,
2332                               struct iw_cm_event *iw_event)
2333{
2334        struct rdma_cm_id *new_cm_id;
2335        struct rdma_id_private *listen_id, *conn_id;
2336        struct rdma_cm_event event = {};
2337        int ret = -ECONNABORTED;
2338        struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2339        struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
2340
2341        event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
2342        event.param.conn.private_data = iw_event->private_data;
2343        event.param.conn.private_data_len = iw_event->private_data_len;
2344        event.param.conn.initiator_depth = iw_event->ird;
2345        event.param.conn.responder_resources = iw_event->ord;
2346
2347        listen_id = cm_id->context;
2348
2349        mutex_lock(&listen_id->handler_mutex);
2350        if (listen_id->state != RDMA_CM_LISTEN)
2351                goto out;
2352
2353        /* Create a new RDMA id for the new IW CM ID */
2354        new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
2355                                     listen_id->id.event_handler,
2356                                     listen_id->id.context,
2357                                     RDMA_PS_TCP, IB_QPT_RC,
2358                                     listen_id->res.kern_name);
2359        if (IS_ERR(new_cm_id)) {
2360                ret = -ENOMEM;
2361                goto out;
2362        }
2363        conn_id = container_of(new_cm_id, struct rdma_id_private, id);
2364        mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2365        conn_id->state = RDMA_CM_CONNECT;
2366
2367        ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
2368        if (ret) {
2369                mutex_unlock(&conn_id->handler_mutex);
2370                rdma_destroy_id(new_cm_id);
2371                goto out;
2372        }
2373
2374        ret = cma_iw_acquire_dev(conn_id, listen_id);
2375        if (ret) {
2376                mutex_unlock(&conn_id->handler_mutex);
2377                rdma_destroy_id(new_cm_id);
2378                goto out;
2379        }
2380
2381        conn_id->cm_id.iw = cm_id;
2382        cm_id->context = conn_id;
2383        cm_id->cm_handler = cma_iw_handler;
2384
2385        memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
2386        memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
2387
2388        /*
2389         * Protect against the user destroying conn_id from another thread
2390         * until we're done accessing it.
2391         */
2392        atomic_inc(&conn_id->refcount);
2393        ret = conn_id->id.event_handler(&conn_id->id, &event);
2394        if (ret) {
2395                /* User wants to destroy the CM ID */
2396                conn_id->cm_id.iw = NULL;
2397                cma_exch(conn_id, RDMA_CM_DESTROYING);
2398                mutex_unlock(&conn_id->handler_mutex);
2399                mutex_unlock(&listen_id->handler_mutex);
2400                cma_deref_id(conn_id);
2401                rdma_destroy_id(&conn_id->id);
2402                return ret;
2403        }
2404
2405        mutex_unlock(&conn_id->handler_mutex);
2406        cma_deref_id(conn_id);
2407
2408out:
2409        mutex_unlock(&listen_id->handler_mutex);
2410        return ret;
2411}
2412
2413static int cma_ib_listen(struct rdma_id_private *id_priv)
2414{
2415        struct sockaddr *addr;
2416        struct ib_cm_id *id;
2417        __be64 svc_id;
2418
2419        addr = cma_src_addr(id_priv);
2420        svc_id = rdma_get_service_id(&id_priv->id, addr);
2421        id = ib_cm_insert_listen(id_priv->id.device,
2422                                 cma_ib_req_handler, svc_id);
2423        if (IS_ERR(id))
2424                return PTR_ERR(id);
2425        id_priv->cm_id.ib = id;
2426
2427        return 0;
2428}
2429
2430static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
2431{
2432        int ret;
2433        struct iw_cm_id *id;
2434
2435        id = iw_create_cm_id(id_priv->id.device,
2436                             iw_conn_req_handler,
2437                             id_priv);
2438        if (IS_ERR(id))
2439                return PTR_ERR(id);
2440
2441        id->tos = id_priv->tos;
2442        id->tos_set = id_priv->tos_set;
2443        id_priv->cm_id.iw = id;
2444
2445        memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
2446               rdma_addr_size(cma_src_addr(id_priv)));
2447
2448        ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
2449
2450        if (ret) {
2451                iw_destroy_cm_id(id_priv->cm_id.iw);
2452                id_priv->cm_id.iw = NULL;
2453        }
2454
2455        return ret;
2456}
2457
2458static int cma_listen_handler(struct rdma_cm_id *id,
2459                              struct rdma_cm_event *event)
2460{
2461        struct rdma_id_private *id_priv = id->context;
2462
2463        id->context = id_priv->id.context;
2464        id->event_handler = id_priv->id.event_handler;
2465        return id_priv->id.event_handler(id, event);
2466}
2467
2468static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2469                              struct cma_device *cma_dev)
2470{
2471        struct rdma_id_private *dev_id_priv;
2472        struct rdma_cm_id *id;
2473        struct net *net = id_priv->id.route.addr.dev_addr.net;
2474        int ret;
2475
2476        if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
2477                return;
2478
2479        id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
2480                              id_priv->id.qp_type, id_priv->res.kern_name);
2481        if (IS_ERR(id))
2482                return;
2483
2484        dev_id_priv = container_of(id, struct rdma_id_private, id);
2485
2486        dev_id_priv->state = RDMA_CM_ADDR_BOUND;
2487        memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
2488               rdma_addr_size(cma_src_addr(id_priv)));
2489
2490        _cma_attach_to_dev(dev_id_priv, cma_dev);
2491        list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
2492        atomic_inc(&id_priv->refcount);
2493        dev_id_priv->internal_id = 1;
2494        dev_id_priv->afonly = id_priv->afonly;
2495        dev_id_priv->tos_set = id_priv->tos_set;
2496        dev_id_priv->tos = id_priv->tos;
2497
2498        ret = rdma_listen(id, id_priv->backlog);
2499        if (ret)
2500                dev_warn(&cma_dev->device->dev,
2501                         "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
2502}
2503
2504static void cma_listen_on_all(struct rdma_id_private *id_priv)
2505{
2506        struct cma_device *cma_dev;
2507
2508        mutex_lock(&lock);
2509        list_add_tail(&id_priv->list, &listen_any_list);
2510        list_for_each_entry(cma_dev, &dev_list, list)
2511                cma_listen_on_dev(id_priv, cma_dev);
2512        mutex_unlock(&lock);
2513}
2514
2515void rdma_set_service_type(struct rdma_cm_id *id, int tos)
2516{
2517        struct rdma_id_private *id_priv;
2518
2519        id_priv = container_of(id, struct rdma_id_private, id);
2520        id_priv->tos = (u8) tos;
2521        id_priv->tos_set = true;
2522}
2523EXPORT_SYMBOL(rdma_set_service_type);
2524
2525/**
2526 * rdma_set_ack_timeout() - Set the ack timeout of QP associated
2527 *                          with a connection identifier.
2528 * @id: Communication identifier to associated with service type.
2529 * @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec.
2530 *
2531 * This function should be called before rdma_connect() on active side,
2532 * and on passive side before rdma_accept(). It is applicable to primary
2533 * path only. The timeout will affect the local side of the QP, it is not
2534 * negotiated with remote side and zero disables the timer.
2535 *
2536 * Return: 0 for success
2537 */
2538int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
2539{
2540        struct rdma_id_private *id_priv;
2541
2542        if (id->qp_type != IB_QPT_RC)
2543                return -EINVAL;
2544
2545        id_priv = container_of(id, struct rdma_id_private, id);
2546        id_priv->timeout = timeout;
2547        id_priv->timeout_set = true;
2548
2549        return 0;
2550}
2551EXPORT_SYMBOL(rdma_set_ack_timeout);
2552
2553static void cma_query_handler(int status, struct sa_path_rec *path_rec,
2554                              void *context)
2555{
2556        struct cma_work *work = context;
2557        struct rdma_route *route;
2558
2559        route = &work->id->id.route;
2560
2561        if (!status) {
2562                route->num_paths = 1;
2563                *route->path_rec = *path_rec;
2564        } else {
2565                work->old_state = RDMA_CM_ROUTE_QUERY;
2566                work->new_state = RDMA_CM_ADDR_RESOLVED;
2567                work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
2568                work->event.status = status;
2569                pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
2570                                     status);
2571        }
2572
2573        queue_work(cma_wq, &work->work);
2574}
2575
2576static int cma_query_ib_route(struct rdma_id_private *id_priv,
2577                              unsigned long timeout_ms, struct cma_work *work)
2578{
2579        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2580        struct sa_path_rec path_rec;
2581        ib_sa_comp_mask comp_mask;
2582        struct sockaddr_in6 *sin6;
2583        struct sockaddr_ib *sib;
2584
2585        memset(&path_rec, 0, sizeof path_rec);
2586
2587        if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num))
2588                path_rec.rec_type = SA_PATH_REC_TYPE_OPA;
2589        else
2590                path_rec.rec_type = SA_PATH_REC_TYPE_IB;
2591        rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
2592        rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
2593        path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2594        path_rec.numb_path = 1;
2595        path_rec.reversible = 1;
2596        path_rec.service_id = rdma_get_service_id(&id_priv->id,
2597                                                  cma_dst_addr(id_priv));
2598
2599        comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
2600                    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
2601                    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
2602
2603        switch (cma_family(id_priv)) {
2604        case AF_INET:
2605                path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
2606                comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
2607                break;
2608        case AF_INET6:
2609                sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
2610                path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
2611                comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
2612                break;
2613        case AF_IB:
2614                sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
2615                path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20);
2616                comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
2617                break;
2618        }
2619
2620        id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
2621                                               id_priv->id.port_num, &path_rec,
2622                                               comp_mask, timeout_ms,
2623                                               GFP_KERNEL, cma_query_handler,
2624                                               work, &id_priv->query);
2625
2626        return (id_priv->query_id < 0) ? id_priv->query_id : 0;
2627}
2628
2629static void cma_work_handler(struct work_struct *_work)
2630{
2631        struct cma_work *work = container_of(_work, struct cma_work, work);
2632        struct rdma_id_private *id_priv = work->id;
2633        int destroy = 0;
2634
2635        mutex_lock(&id_priv->handler_mutex);
2636        if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
2637                goto out;
2638
2639        if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
2640                cma_exch(id_priv, RDMA_CM_DESTROYING);
2641                destroy = 1;
2642        }
2643out:
2644        mutex_unlock(&id_priv->handler_mutex);
2645        cma_deref_id(id_priv);
2646        if (destroy)
2647                rdma_destroy_id(&id_priv->id);
2648        kfree(work);
2649}
2650
2651static void cma_ndev_work_handler(struct work_struct *_work)
2652{
2653        struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
2654        struct rdma_id_private *id_priv = work->id;
2655        int destroy = 0;
2656
2657        mutex_lock(&id_priv->handler_mutex);
2658        if (id_priv->state == RDMA_CM_DESTROYING ||
2659            id_priv->state == RDMA_CM_DEVICE_REMOVAL)
2660                goto out;
2661
2662        if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
2663                cma_exch(id_priv, RDMA_CM_DESTROYING);
2664                destroy = 1;
2665        }
2666
2667out:
2668        mutex_unlock(&id_priv->handler_mutex);
2669        cma_deref_id(id_priv);
2670        if (destroy)
2671                rdma_destroy_id(&id_priv->id);
2672        kfree(work);
2673}
2674
2675static void cma_init_resolve_route_work(struct cma_work *work,
2676                                        struct rdma_id_private *id_priv)
2677{
2678        work->id = id_priv;
2679        INIT_WORK(&work->work, cma_work_handler);
2680        work->old_state = RDMA_CM_ROUTE_QUERY;
2681        work->new_state = RDMA_CM_ROUTE_RESOLVED;
2682        work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2683}
2684
2685static void cma_init_resolve_addr_work(struct cma_work *work,
2686                                       struct rdma_id_private *id_priv)
2687{
2688        work->id = id_priv;
2689        INIT_WORK(&work->work, cma_work_handler);
2690        work->old_state = RDMA_CM_ADDR_QUERY;
2691        work->new_state = RDMA_CM_ADDR_RESOLVED;
2692        work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2693}
2694
2695static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
2696                                unsigned long timeout_ms)
2697{
2698        struct rdma_route *route = &id_priv->id.route;
2699        struct cma_work *work;
2700        int ret;
2701
2702        work = kzalloc(sizeof *work, GFP_KERNEL);
2703        if (!work)
2704                return -ENOMEM;
2705
2706        cma_init_resolve_route_work(work, id_priv);
2707
2708        route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
2709        if (!route->path_rec) {
2710                ret = -ENOMEM;
2711                goto err1;
2712        }
2713
2714        ret = cma_query_ib_route(id_priv, timeout_ms, work);
2715        if (ret)
2716                goto err2;
2717
2718        return 0;
2719err2:
2720        kfree(route->path_rec);
2721        route->path_rec = NULL;
2722err1:
2723        kfree(work);
2724        return ret;
2725}
2726
2727static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
2728                                           unsigned long supported_gids,
2729                                           enum ib_gid_type default_gid)
2730{
2731        if ((network_type == RDMA_NETWORK_IPV4 ||
2732             network_type == RDMA_NETWORK_IPV6) &&
2733            test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
2734                return IB_GID_TYPE_ROCE_UDP_ENCAP;
2735
2736        return default_gid;
2737}
2738
2739/*
2740 * cma_iboe_set_path_rec_l2_fields() is helper function which sets
2741 * path record type based on GID type.
2742 * It also sets up other L2 fields which includes destination mac address
2743 * netdev ifindex, of the path record.
2744 * It returns the netdev of the bound interface for this path record entry.
2745 */
2746static struct net_device *
2747cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
2748{
2749        struct rdma_route *route = &id_priv->id.route;
2750        enum ib_gid_type gid_type = IB_GID_TYPE_ROCE;
2751        struct rdma_addr *addr = &route->addr;
2752        unsigned long supported_gids;
2753        struct net_device *ndev;
2754
2755        if (!addr->dev_addr.bound_dev_if)
2756                return NULL;
2757
2758        ndev = dev_get_by_index(addr->dev_addr.net,
2759                                addr->dev_addr.bound_dev_if);
2760        if (!ndev)
2761                return NULL;
2762
2763        supported_gids = roce_gid_type_mask_support(id_priv->id.device,
2764                                                    id_priv->id.port_num);
2765        gid_type = cma_route_gid_type(addr->dev_addr.network,
2766                                      supported_gids,
2767                                      id_priv->gid_type);
2768        /* Use the hint from IP Stack to select GID Type */
2769        if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
2770                gid_type = ib_network_to_gid_type(addr->dev_addr.network);
2771        route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
2772
2773        route->path_rec->roce.route_resolved = true;
2774        sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
2775        return ndev;
2776}
2777
2778int rdma_set_ib_path(struct rdma_cm_id *id,
2779                     struct sa_path_rec *path_rec)
2780{
2781        struct rdma_id_private *id_priv;
2782        struct net_device *ndev;
2783        int ret;
2784
2785        id_priv = container_of(id, struct rdma_id_private, id);
2786        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
2787                           RDMA_CM_ROUTE_RESOLVED))
2788                return -EINVAL;
2789
2790        id->route.path_rec = kmemdup(path_rec, sizeof(*path_rec),
2791                                     GFP_KERNEL);
2792        if (!id->route.path_rec) {
2793                ret = -ENOMEM;
2794                goto err;
2795        }
2796
2797        if (rdma_protocol_roce(id->device, id->port_num)) {
2798                ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
2799                if (!ndev) {
2800                        ret = -ENODEV;
2801                        goto err_free;
2802                }
2803                dev_put(ndev);
2804        }
2805
2806        id->route.num_paths = 1;
2807        return 0;
2808
2809err_free:
2810        kfree(id->route.path_rec);
2811        id->route.path_rec = NULL;
2812err:
2813        cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
2814        return ret;
2815}
2816EXPORT_SYMBOL(rdma_set_ib_path);
2817
2818static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
2819{
2820        struct cma_work *work;
2821
2822        work = kzalloc(sizeof *work, GFP_KERNEL);
2823        if (!work)
2824                return -ENOMEM;
2825
2826        cma_init_resolve_route_work(work, id_priv);
2827        queue_work(cma_wq, &work->work);
2828        return 0;
2829}
2830
2831static int iboe_tos_to_sl(struct net_device *ndev, int tos)
2832{
2833        int prio;
2834        struct net_device *dev;
2835
2836        prio = rt_tos2priority(tos);
2837        dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
2838        if (dev->num_tc)
2839                return netdev_get_prio_tc_map(dev, prio);
2840
2841#if IS_ENABLED(CONFIG_VLAN_8021Q)
2842        if (is_vlan_dev(ndev))
2843                return (vlan_dev_get_egress_qos_mask(ndev, prio) &
2844                        VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
2845#endif
2846        return 0;
2847}
2848
2849static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2850{
2851        struct rdma_route *route = &id_priv->id.route;
2852        struct rdma_addr *addr = &route->addr;
2853        struct cma_work *work;
2854        int ret;
2855        struct net_device *ndev;
2856
2857        u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
2858                                        rdma_start_port(id_priv->cma_dev->device)];
2859        u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
2860
2861
2862        work = kzalloc(sizeof *work, GFP_KERNEL);
2863        if (!work)
2864                return -ENOMEM;
2865
2866        route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
2867        if (!route->path_rec) {
2868                ret = -ENOMEM;
2869                goto err1;
2870        }
2871
2872        route->num_paths = 1;
2873
2874        ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
2875        if (!ndev) {
2876                ret = -ENODEV;
2877                goto err2;
2878        }
2879
2880        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
2881                    &route->path_rec->sgid);
2882        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
2883                    &route->path_rec->dgid);
2884
2885        if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
2886                /* TODO: get the hoplimit from the inet/inet6 device */
2887                route->path_rec->hop_limit = addr->dev_addr.hoplimit;
2888        else
2889                route->path_rec->hop_limit = 1;
2890        route->path_rec->reversible = 1;
2891        route->path_rec->pkey = cpu_to_be16(0xffff);
2892        route->path_rec->mtu_selector = IB_SA_EQ;
2893        route->path_rec->sl = iboe_tos_to_sl(ndev, tos);
2894        route->path_rec->traffic_class = tos;
2895        route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
2896        route->path_rec->rate_selector = IB_SA_EQ;
2897        route->path_rec->rate = iboe_get_rate(ndev);
2898        dev_put(ndev);
2899        route->path_rec->packet_life_time_selector = IB_SA_EQ;
2900        route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
2901        if (!route->path_rec->mtu) {
2902                ret = -EINVAL;
2903                goto err2;
2904        }
2905
2906        cma_init_resolve_route_work(work, id_priv);
2907        queue_work(cma_wq, &work->work);
2908
2909        return 0;
2910
2911err2:
2912        kfree(route->path_rec);
2913        route->path_rec = NULL;
2914err1:
2915        kfree(work);
2916        return ret;
2917}
2918
2919int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
2920{
2921        struct rdma_id_private *id_priv;
2922        int ret;
2923
2924        id_priv = container_of(id, struct rdma_id_private, id);
2925        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
2926                return -EINVAL;
2927
2928        atomic_inc(&id_priv->refcount);
2929        if (rdma_cap_ib_sa(id->device, id->port_num))
2930                ret = cma_resolve_ib_route(id_priv, timeout_ms);
2931        else if (rdma_protocol_roce(id->device, id->port_num))
2932                ret = cma_resolve_iboe_route(id_priv);
2933        else if (rdma_protocol_iwarp(id->device, id->port_num))
2934                ret = cma_resolve_iw_route(id_priv);
2935        else
2936                ret = -ENOSYS;
2937
2938        if (ret)
2939                goto err;
2940
2941        return 0;
2942err:
2943        cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
2944        cma_deref_id(id_priv);
2945        return ret;
2946}
2947EXPORT_SYMBOL(rdma_resolve_route);
2948
2949static void cma_set_loopback(struct sockaddr *addr)
2950{
2951        switch (addr->sa_family) {
2952        case AF_INET:
2953                ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
2954                break;
2955        case AF_INET6:
2956                ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr,
2957                              0, 0, 0, htonl(1));
2958                break;
2959        default:
2960                ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr,
2961                            0, 0, 0, htonl(1));
2962                break;
2963        }
2964}
2965
2966static int cma_bind_loopback(struct rdma_id_private *id_priv)
2967{
2968        struct cma_device *cma_dev, *cur_dev;
2969        union ib_gid gid;
2970        enum ib_port_state port_state;
2971        u16 pkey;
2972        int ret;
2973        u8 p;
2974
2975        cma_dev = NULL;
2976        mutex_lock(&lock);
2977        list_for_each_entry(cur_dev, &dev_list, list) {
2978                if (cma_family(id_priv) == AF_IB &&
2979                    !rdma_cap_ib_cm(cur_dev->device, 1))
2980                        continue;
2981
2982                if (!cma_dev)
2983                        cma_dev = cur_dev;
2984
2985                for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
2986                        if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
2987                            port_state == IB_PORT_ACTIVE) {
2988                                cma_dev = cur_dev;
2989                                goto port_found;
2990                        }
2991                }
2992        }
2993
2994        if (!cma_dev) {
2995                ret = -ENODEV;
2996                goto out;
2997        }
2998
2999        p = 1;
3000
3001port_found:
3002        ret = rdma_query_gid(cma_dev->device, p, 0, &gid);
3003        if (ret)
3004                goto out;
3005
3006        ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
3007        if (ret)
3008                goto out;
3009
3010        id_priv->id.route.addr.dev_addr.dev_type =
3011                (rdma_protocol_ib(cma_dev->device, p)) ?
3012                ARPHRD_INFINIBAND : ARPHRD_ETHER;
3013
3014        rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
3015        ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
3016        id_priv->id.port_num = p;
3017        cma_attach_to_dev(id_priv, cma_dev);
3018        cma_set_loopback(cma_src_addr(id_priv));
3019out:
3020        mutex_unlock(&lock);
3021        return ret;
3022}
3023
3024static void addr_handler(int status, struct sockaddr *src_addr,
3025                         struct rdma_dev_addr *dev_addr, void *context)
3026{
3027        struct rdma_id_private *id_priv = context;
3028        struct rdma_cm_event event = {};
3029        struct sockaddr *addr;
3030        struct sockaddr_storage old_addr;
3031
3032        mutex_lock(&id_priv->handler_mutex);
3033        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
3034                           RDMA_CM_ADDR_RESOLVED))
3035                goto out;
3036
3037        /*
3038         * Store the previous src address, so that if we fail to acquire
3039         * matching rdma device, old address can be restored back, which helps
3040         * to cancel the cma listen operation correctly.
3041         */
3042        addr = cma_src_addr(id_priv);
3043        memcpy(&old_addr, addr, rdma_addr_size(addr));
3044        memcpy(addr, src_addr, rdma_addr_size(src_addr));
3045        if (!status && !id_priv->cma_dev) {
3046                status = cma_acquire_dev_by_src_ip(id_priv);
3047                if (status)
3048                        pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
3049                                             status);
3050        } else if (status) {
3051                pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
3052        }
3053
3054        if (status) {
3055                memcpy(addr, &old_addr,
3056                       rdma_addr_size((struct sockaddr *)&old_addr));
3057                if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
3058                                   RDMA_CM_ADDR_BOUND))
3059                        goto out;
3060                event.event = RDMA_CM_EVENT_ADDR_ERROR;
3061                event.status = status;
3062        } else
3063                event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
3064
3065        if (id_priv->id.event_handler(&id_priv->id, &event)) {
3066                cma_exch(id_priv, RDMA_CM_DESTROYING);
3067                mutex_unlock(&id_priv->handler_mutex);
3068                rdma_destroy_id(&id_priv->id);
3069                return;
3070        }
3071out:
3072        mutex_unlock(&id_priv->handler_mutex);
3073}
3074
3075static int cma_resolve_loopback(struct rdma_id_private *id_priv)
3076{
3077        struct cma_work *work;
3078        union ib_gid gid;
3079        int ret;
3080
3081        work = kzalloc(sizeof *work, GFP_KERNEL);
3082        if (!work)
3083                return -ENOMEM;
3084
3085        if (!id_priv->cma_dev) {
3086                ret = cma_bind_loopback(id_priv);
3087                if (ret)
3088                        goto err;
3089        }
3090
3091        rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
3092        rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
3093
3094        cma_init_resolve_addr_work(work, id_priv);
3095        queue_work(cma_wq, &work->work);
3096        return 0;
3097err:
3098        kfree(work);
3099        return ret;
3100}
3101
3102static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
3103{
3104        struct cma_work *work;
3105        int ret;
3106
3107        work = kzalloc(sizeof *work, GFP_KERNEL);
3108        if (!work)
3109                return -ENOMEM;
3110
3111        if (!id_priv->cma_dev) {
3112                ret = cma_resolve_ib_dev(id_priv);
3113                if (ret)
3114                        goto err;
3115        }
3116
3117        rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
3118                &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
3119
3120        cma_init_resolve_addr_work(work, id_priv);
3121        queue_work(cma_wq, &work->work);
3122        return 0;
3123err:
3124        kfree(work);
3125        return ret;
3126}
3127
3128static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
3129                         const struct sockaddr *dst_addr)
3130{
3131        if (!src_addr || !src_addr->sa_family) {
3132                src_addr = (struct sockaddr *) &id->route.addr.src_addr;
3133                src_addr->sa_family = dst_addr->sa_family;
3134                if (IS_ENABLED(CONFIG_IPV6) &&
3135                    dst_addr->sa_family == AF_INET6) {
3136                        struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
3137                        struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
3138                        src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
3139                        if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
3140                                id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
3141                } else if (dst_addr->sa_family == AF_IB) {
3142                        ((struct sockaddr_ib *) src_addr)->sib_pkey =
3143                                ((struct sockaddr_ib *) dst_addr)->sib_pkey;
3144                }
3145        }
3146        return rdma_bind_addr(id, src_addr);
3147}
3148
3149int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
3150                      const struct sockaddr *dst_addr, unsigned long timeout_ms)
3151{
3152        struct rdma_id_private *id_priv;
3153        int ret;
3154
3155        id_priv = container_of(id, struct rdma_id_private, id);
3156        if (id_priv->state == RDMA_CM_IDLE) {
3157                ret = cma_bind_addr(id, src_addr, dst_addr);
3158                if (ret)
3159                        return ret;
3160        }
3161
3162        if (cma_family(id_priv) != dst_addr->sa_family)
3163                return -EINVAL;
3164
3165        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
3166                return -EINVAL;
3167
3168        memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
3169        if (cma_any_addr(dst_addr)) {
3170                ret = cma_resolve_loopback(id_priv);
3171        } else {
3172                if (dst_addr->sa_family == AF_IB) {
3173                        ret = cma_resolve_ib_addr(id_priv);
3174                } else {
3175                        ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
3176                                              &id->route.addr.dev_addr,
3177                                              timeout_ms, addr_handler,
3178                                              false, id_priv);
3179                }
3180        }
3181        if (ret)
3182                goto err;
3183
3184        return 0;
3185err:
3186        cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
3187        return ret;
3188}
3189EXPORT_SYMBOL(rdma_resolve_addr);
3190
3191int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
3192{
3193        struct rdma_id_private *id_priv;
3194        unsigned long flags;
3195        int ret;
3196
3197        id_priv = container_of(id, struct rdma_id_private, id);
3198        spin_lock_irqsave(&id_priv->lock, flags);
3199        if (reuse || id_priv->state == RDMA_CM_IDLE) {
3200                id_priv->reuseaddr = reuse;
3201                ret = 0;
3202        } else {
3203                ret = -EINVAL;
3204        }
3205        spin_unlock_irqrestore(&id_priv->lock, flags);
3206        return ret;
3207}
3208EXPORT_SYMBOL(rdma_set_reuseaddr);
3209
3210int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
3211{
3212        struct rdma_id_private *id_priv;
3213        unsigned long flags;
3214        int ret;
3215
3216        id_priv = container_of(id, struct rdma_id_private, id);
3217        spin_lock_irqsave(&id_priv->lock, flags);
3218        if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
3219                id_priv->options |= (1 << CMA_OPTION_AFONLY);
3220                id_priv->afonly = afonly;
3221                ret = 0;
3222        } else {
3223                ret = -EINVAL;
3224        }
3225        spin_unlock_irqrestore(&id_priv->lock, flags);
3226        return ret;
3227}
3228EXPORT_SYMBOL(rdma_set_afonly);
3229
3230static void cma_bind_port(struct rdma_bind_list *bind_list,
3231                          struct rdma_id_private *id_priv)
3232{
3233        struct sockaddr *addr;
3234        struct sockaddr_ib *sib;
3235        u64 sid, mask;
3236        __be16 port;
3237
3238        addr = cma_src_addr(id_priv);
3239        port = htons(bind_list->port);
3240
3241        switch (addr->sa_family) {
3242        case AF_INET:
3243                ((struct sockaddr_in *) addr)->sin_port = port;
3244                break;
3245        case AF_INET6:
3246                ((struct sockaddr_in6 *) addr)->sin6_port = port;
3247                break;
3248        case AF_IB:
3249                sib = (struct sockaddr_ib *) addr;
3250                sid = be64_to_cpu(sib->sib_sid);
3251                mask = be64_to_cpu(sib->sib_sid_mask);
3252                sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port));
3253                sib->sib_sid_mask = cpu_to_be64(~0ULL);
3254                break;
3255        }
3256        id_priv->bind_list = bind_list;
3257        hlist_add_head(&id_priv->node, &bind_list->owners);
3258}
3259
3260static int cma_alloc_port(enum rdma_ucm_port_space ps,
3261                          struct rdma_id_private *id_priv, unsigned short snum)
3262{
3263        struct rdma_bind_list *bind_list;
3264        int ret;
3265
3266        bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
3267        if (!bind_list)
3268                return -ENOMEM;
3269
3270        ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list,
3271                           snum);
3272        if (ret < 0)
3273                goto err;
3274
3275        bind_list->ps = ps;
3276        bind_list->port = snum;
3277        cma_bind_port(bind_list, id_priv);
3278        return 0;
3279err:
3280        kfree(bind_list);
3281        return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
3282}
3283
3284static int cma_port_is_unique(struct rdma_bind_list *bind_list,
3285                              struct rdma_id_private *id_priv)
3286{
3287        struct rdma_id_private *cur_id;
3288        struct sockaddr  *daddr = cma_dst_addr(id_priv);
3289        struct sockaddr  *saddr = cma_src_addr(id_priv);
3290        __be16 dport = cma_port(daddr);
3291
3292        hlist_for_each_entry(cur_id, &bind_list->owners, node) {
3293                struct sockaddr  *cur_daddr = cma_dst_addr(cur_id);
3294                struct sockaddr  *cur_saddr = cma_src_addr(cur_id);
3295                __be16 cur_dport = cma_port(cur_daddr);
3296
3297                if (id_priv == cur_id)
3298                        continue;
3299
3300                /* different dest port -> unique */
3301                if (!cma_any_port(daddr) &&
3302                    !cma_any_port(cur_daddr) &&
3303                    (dport != cur_dport))
3304                        continue;
3305
3306                /* different src address -> unique */
3307                if (!cma_any_addr(saddr) &&
3308                    !cma_any_addr(cur_saddr) &&
3309                    cma_addr_cmp(saddr, cur_saddr))
3310                        continue;
3311
3312                /* different dst address -> unique */
3313                if (!cma_any_addr(daddr) &&
3314                    !cma_any_addr(cur_daddr) &&
3315                    cma_addr_cmp(daddr, cur_daddr))
3316                        continue;
3317
3318                return -EADDRNOTAVAIL;
3319        }
3320        return 0;
3321}
3322
3323static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
3324                              struct rdma_id_private *id_priv)
3325{
3326        static unsigned int last_used_port;
3327        int low, high, remaining;
3328        unsigned int rover;
3329        struct net *net = id_priv->id.route.addr.dev_addr.net;
3330
3331        inet_get_local_port_range(net, &low, &high);
3332        remaining = (high - low) + 1;
3333        rover = prandom_u32() % remaining + low;
3334retry:
3335        if (last_used_port != rover) {
3336                struct rdma_bind_list *bind_list;
3337                int ret;
3338
3339                bind_list = cma_ps_find(net, ps, (unsigned short)rover);
3340
3341                if (!bind_list) {
3342                        ret = cma_alloc_port(ps, id_priv, rover);
3343                } else {
3344                        ret = cma_port_is_unique(bind_list, id_priv);
3345                        if (!ret)
3346                                cma_bind_port(bind_list, id_priv);
3347                }
3348                /*
3349                 * Remember previously used port number in order to avoid
3350                 * re-using same port immediately after it is closed.
3351                 */
3352                if (!ret)
3353                        last_used_port = rover;
3354                if (ret != -EADDRNOTAVAIL)
3355                        return ret;
3356        }
3357        if (--remaining) {
3358                rover++;
3359                if ((rover < low) || (rover > high))
3360                        rover = low;
3361                goto retry;
3362        }
3363        return -EADDRNOTAVAIL;
3364}
3365
3366/*
3367 * Check that the requested port is available.  This is called when trying to
3368 * bind to a specific port, or when trying to listen on a bound port.  In
3369 * the latter case, the provided id_priv may already be on the bind_list, but
3370 * we still need to check that it's okay to start listening.
3371 */
3372static int cma_check_port(struct rdma_bind_list *bind_list,
3373                          struct rdma_id_private *id_priv, uint8_t reuseaddr)
3374{
3375        struct rdma_id_private *cur_id;
3376        struct sockaddr *addr, *cur_addr;
3377
3378        addr = cma_src_addr(id_priv);
3379        hlist_for_each_entry(cur_id, &bind_list->owners, node) {
3380                if (id_priv == cur_id)
3381                        continue;
3382
3383                if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
3384                    cur_id->reuseaddr)
3385                        continue;
3386
3387                cur_addr = cma_src_addr(cur_id);
3388                if (id_priv->afonly && cur_id->afonly &&
3389                    (addr->sa_family != cur_addr->sa_family))
3390                        continue;
3391
3392                if (cma_any_addr(addr) || cma_any_addr(cur_addr))
3393                        return -EADDRNOTAVAIL;
3394
3395                if (!cma_addr_cmp(addr, cur_addr))
3396                        return -EADDRINUSE;
3397        }
3398        return 0;
3399}
3400
3401static int cma_use_port(enum rdma_ucm_port_space ps,
3402                        struct rdma_id_private *id_priv)
3403{
3404        struct rdma_bind_list *bind_list;
3405        unsigned short snum;
3406        int ret;
3407
3408        snum = ntohs(cma_port(cma_src_addr(id_priv)));
3409        if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
3410                return -EACCES;
3411
3412        bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum);
3413        if (!bind_list) {
3414                ret = cma_alloc_port(ps, id_priv, snum);
3415        } else {
3416                ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
3417                if (!ret)
3418                        cma_bind_port(bind_list, id_priv);
3419        }
3420        return ret;
3421}
3422
3423static int cma_bind_listen(struct rdma_id_private *id_priv)
3424{
3425        struct rdma_bind_list *bind_list = id_priv->bind_list;
3426        int ret = 0;
3427
3428        mutex_lock(&lock);
3429        if (bind_list->owners.first->next)
3430                ret = cma_check_port(bind_list, id_priv, 0);
3431        mutex_unlock(&lock);
3432        return ret;
3433}
3434
3435static enum rdma_ucm_port_space
3436cma_select_inet_ps(struct rdma_id_private *id_priv)
3437{
3438        switch (id_priv->id.ps) {
3439        case RDMA_PS_TCP:
3440        case RDMA_PS_UDP:
3441        case RDMA_PS_IPOIB:
3442        case RDMA_PS_IB:
3443                return id_priv->id.ps;
3444        default:
3445
3446                return 0;
3447        }
3448}
3449
3450static enum rdma_ucm_port_space
3451cma_select_ib_ps(struct rdma_id_private *id_priv)
3452{
3453        enum rdma_ucm_port_space ps = 0;
3454        struct sockaddr_ib *sib;
3455        u64 sid_ps, mask, sid;
3456
3457        sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
3458        mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK;
3459        sid = be64_to_cpu(sib->sib_sid) & mask;
3460
3461        if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
3462                sid_ps = RDMA_IB_IP_PS_IB;
3463                ps = RDMA_PS_IB;
3464        } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
3465                   (sid == (RDMA_IB_IP_PS_TCP & mask))) {
3466                sid_ps = RDMA_IB_IP_PS_TCP;
3467                ps = RDMA_PS_TCP;
3468        } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
3469                   (sid == (RDMA_IB_IP_PS_UDP & mask))) {
3470                sid_ps = RDMA_IB_IP_PS_UDP;
3471                ps = RDMA_PS_UDP;
3472        }
3473
3474        if (ps) {
3475                sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib)));
3476                sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK |
3477                                                be64_to_cpu(sib->sib_sid_mask));
3478        }
3479        return ps;
3480}
3481
3482static int cma_get_port(struct rdma_id_private *id_priv)
3483{
3484        enum rdma_ucm_port_space ps;
3485        int ret;
3486
3487        if (cma_family(id_priv) != AF_IB)
3488                ps = cma_select_inet_ps(id_priv);
3489        else
3490                ps = cma_select_ib_ps(id_priv);
3491        if (!ps)
3492                return -EPROTONOSUPPORT;
3493
3494        mutex_lock(&lock);
3495        if (cma_any_port(cma_src_addr(id_priv)))
3496                ret = cma_alloc_any_port(ps, id_priv);
3497        else
3498                ret = cma_use_port(ps, id_priv);
3499        mutex_unlock(&lock);
3500
3501        return ret;
3502}
3503
3504static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
3505                               struct sockaddr *addr)
3506{
3507#if IS_ENABLED(CONFIG_IPV6)
3508        struct sockaddr_in6 *sin6;
3509
3510        if (addr->sa_family != AF_INET6)
3511                return 0;
3512
3513        sin6 = (struct sockaddr_in6 *) addr;
3514
3515        if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
3516                return 0;
3517
3518        if (!sin6->sin6_scope_id)
3519                        return -EINVAL;
3520
3521        dev_addr->bound_dev_if = sin6->sin6_scope_id;
3522#endif
3523        return 0;
3524}
3525
3526int rdma_listen(struct rdma_cm_id *id, int backlog)
3527{
3528        struct rdma_id_private *id_priv;
3529        int ret;
3530
3531        id_priv = container_of(id, struct rdma_id_private, id);
3532        if (id_priv->state == RDMA_CM_IDLE) {
3533                id->route.addr.src_addr.ss_family = AF_INET;
3534                ret = rdma_bind_addr(id, cma_src_addr(id_priv));
3535                if (ret)
3536                        return ret;
3537        }
3538
3539        if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
3540                return -EINVAL;
3541
3542        if (id_priv->reuseaddr) {
3543                ret = cma_bind_listen(id_priv);
3544                if (ret)
3545                        goto err;
3546        }
3547
3548        id_priv->backlog = backlog;
3549        if (id->device) {
3550                if (rdma_cap_ib_cm(id->device, 1)) {
3551                        ret = cma_ib_listen(id_priv);
3552                        if (ret)
3553                                goto err;
3554                } else if (rdma_cap_iw_cm(id->device, 1)) {
3555                        ret = cma_iw_listen(id_priv, backlog);
3556                        if (ret)
3557                                goto err;
3558                } else {
3559                        ret = -ENOSYS;
3560                        goto err;
3561                }
3562        } else
3563                cma_listen_on_all(id_priv);
3564
3565        return 0;
3566err:
3567        id_priv->backlog = 0;
3568        cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
3569        return ret;
3570}
3571EXPORT_SYMBOL(rdma_listen);
3572
3573int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
3574{
3575        struct rdma_id_private *id_priv;
3576        int ret;
3577        struct sockaddr  *daddr;
3578
3579        if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
3580            addr->sa_family != AF_IB)
3581                return -EAFNOSUPPORT;
3582
3583        id_priv = container_of(id, struct rdma_id_private, id);
3584        if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
3585                return -EINVAL;
3586
3587        ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
3588        if (ret)
3589                goto err1;
3590
3591        memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
3592        if (!cma_any_addr(addr)) {
3593                ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
3594                if (ret)
3595                        goto err1;
3596
3597                ret = cma_acquire_dev_by_src_ip(id_priv);
3598                if (ret)
3599                        goto err1;
3600        }
3601
3602        if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
3603                if (addr->sa_family == AF_INET)
3604                        id_priv->afonly = 1;
3605#if IS_ENABLED(CONFIG_IPV6)
3606                else if (addr->sa_family == AF_INET6) {
3607                        struct net *net = id_priv->id.route.addr.dev_addr.net;
3608
3609                        id_priv->afonly = net->ipv6.sysctl.bindv6only;
3610                }
3611#endif
3612        }
3613        daddr = cma_dst_addr(id_priv);
3614        daddr->sa_family = addr->sa_family;
3615
3616        ret = cma_get_port(id_priv);
3617        if (ret)
3618                goto err2;
3619
3620        return 0;
3621err2:
3622        rdma_restrack_del(&id_priv->res);
3623        if (id_priv->cma_dev)
3624                cma_release_dev(id_priv);
3625err1:
3626        cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
3627        return ret;
3628}
3629EXPORT_SYMBOL(rdma_bind_addr);
3630
3631static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
3632{
3633        struct cma_hdr *cma_hdr;
3634
3635        cma_hdr = hdr;
3636        cma_hdr->cma_version = CMA_VERSION;
3637        if (cma_family(id_priv) == AF_INET) {
3638                struct sockaddr_in *src4, *dst4;
3639
3640                src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
3641                dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
3642
3643                cma_set_ip_ver(cma_hdr, 4);
3644                cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
3645                cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
3646                cma_hdr->port = src4->sin_port;
3647        } else if (cma_family(id_priv) == AF_INET6) {
3648                struct sockaddr_in6 *src6, *dst6;
3649
3650                src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
3651                dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
3652
3653                cma_set_ip_ver(cma_hdr, 6);
3654                cma_hdr->src_addr.ip6 = src6->sin6_addr;
3655                cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
3656                cma_hdr->port = src6->sin6_port;
3657        }
3658        return 0;
3659}
3660
3661static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
3662                                const struct ib_cm_event *ib_event)
3663{
3664        struct rdma_id_private *id_priv = cm_id->context;
3665        struct rdma_cm_event event = {};
3666        const struct ib_cm_sidr_rep_event_param *rep =
3667                                &ib_event->param.sidr_rep_rcvd;
3668        int ret = 0;
3669
3670        mutex_lock(&id_priv->handler_mutex);
3671        if (id_priv->state != RDMA_CM_CONNECT)
3672                goto out;
3673
3674        switch (ib_event->event) {
3675        case IB_CM_SIDR_REQ_ERROR:
3676                event.event = RDMA_CM_EVENT_UNREACHABLE;
3677                event.status = -ETIMEDOUT;
3678                break;
3679        case IB_CM_SIDR_REP_RECEIVED:
3680                event.param.ud.private_data = ib_event->private_data;
3681                event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
3682                if (rep->status != IB_SIDR_SUCCESS) {
3683                        event.event = RDMA_CM_EVENT_UNREACHABLE;
3684                        event.status = ib_event->param.sidr_rep_rcvd.status;
3685                        pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n",
3686                                             event.status);
3687                        break;
3688                }
3689                ret = cma_set_qkey(id_priv, rep->qkey);
3690                if (ret) {
3691                        pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret);
3692                        event.event = RDMA_CM_EVENT_ADDR_ERROR;
3693                        event.status = ret;
3694                        break;
3695                }
3696                ib_init_ah_attr_from_path(id_priv->id.device,
3697                                          id_priv->id.port_num,
3698                                          id_priv->id.route.path_rec,
3699                                          &event.param.ud.ah_attr,
3700                                          rep->sgid_attr);
3701                event.param.ud.qp_num = rep->qpn;
3702                event.param.ud.qkey = rep->qkey;
3703                event.event = RDMA_CM_EVENT_ESTABLISHED;
3704                event.status = 0;
3705                break;
3706        default:
3707                pr_err("RDMA CMA: unexpected IB CM event: %d\n",
3708                       ib_event->event);
3709                goto out;
3710        }
3711
3712        ret = id_priv->id.event_handler(&id_priv->id, &event);
3713
3714        rdma_destroy_ah_attr(&event.param.ud.ah_attr);
3715        if (ret) {
3716                /* Destroy the CM ID by returning a non-zero value. */
3717                id_priv->cm_id.ib = NULL;
3718                cma_exch(id_priv, RDMA_CM_DESTROYING);
3719                mutex_unlock(&id_priv->handler_mutex);
3720                rdma_destroy_id(&id_priv->id);
3721                return ret;
3722        }
3723out:
3724        mutex_unlock(&id_priv->handler_mutex);
3725        return ret;
3726}
3727
3728static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
3729                              struct rdma_conn_param *conn_param)
3730{
3731        struct ib_cm_sidr_req_param req;
3732        struct ib_cm_id *id;
3733        void *private_data;
3734        u8 offset;
3735        int ret;
3736
3737        memset(&req, 0, sizeof req);
3738        offset = cma_user_data_offset(id_priv);
3739        req.private_data_len = offset + conn_param->private_data_len;
3740        if (req.private_data_len < conn_param->private_data_len)
3741                return -EINVAL;
3742
3743        if (req.private_data_len) {
3744                private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
3745                if (!private_data)
3746                        return -ENOMEM;
3747        } else {
3748                private_data = NULL;
3749        }
3750
3751        if (conn_param->private_data && conn_param->private_data_len)
3752                memcpy(private_data + offset, conn_param->private_data,
3753                       conn_param->private_data_len);
3754
3755        if (private_data) {
3756                ret = cma_format_hdr(private_data, id_priv);
3757                if (ret)
3758                        goto out;
3759                req.private_data = private_data;
3760        }
3761
3762        id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
3763                             id_priv);
3764        if (IS_ERR(id)) {
3765                ret = PTR_ERR(id);
3766                goto out;
3767        }
3768        id_priv->cm_id.ib = id;
3769
3770        req.path = id_priv->id.route.path_rec;
3771        req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
3772        req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3773        req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
3774        req.max_cm_retries = CMA_MAX_CM_RETRIES;
3775
3776        ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
3777        if (ret) {
3778                ib_destroy_cm_id(id_priv->cm_id.ib);
3779                id_priv->cm_id.ib = NULL;
3780        }
3781out:
3782        kfree(private_data);
3783        return ret;
3784}
3785
3786static int cma_connect_ib(struct rdma_id_private *id_priv,
3787                          struct rdma_conn_param *conn_param)
3788{
3789        struct ib_cm_req_param req;
3790        struct rdma_route *route;
3791        void *private_data;
3792        struct ib_cm_id *id;
3793        u8 offset;
3794        int ret;
3795
3796        memset(&req, 0, sizeof req);
3797        offset = cma_user_data_offset(id_priv);
3798        req.private_data_len = offset + conn_param->private_data_len;
3799        if (req.private_data_len < conn_param->private_data_len)
3800                return -EINVAL;
3801
3802        if (req.private_data_len) {
3803                private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
3804                if (!private_data)
3805                        return -ENOMEM;
3806        } else {
3807                private_data = NULL;
3808        }
3809
3810        if (conn_param->private_data && conn_param->private_data_len)
3811                memcpy(private_data + offset, conn_param->private_data,
3812                       conn_param->private_data_len);
3813
3814        id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv);
3815        if (IS_ERR(id)) {
3816                ret = PTR_ERR(id);
3817                goto out;
3818        }
3819        id_priv->cm_id.ib = id;
3820
3821        route = &id_priv->id.route;
3822        if (private_data) {
3823                ret = cma_format_hdr(private_data, id_priv);
3824                if (ret)
3825                        goto out;
3826                req.private_data = private_data;
3827        }
3828
3829        req.primary_path = &route->path_rec[0];
3830        if (route->num_paths == 2)
3831                req.alternate_path = &route->path_rec[1];
3832
3833        req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
3834        /* Alternate path SGID attribute currently unsupported */
3835        req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3836        req.qp_num = id_priv->qp_num;
3837        req.qp_type = id_priv->id.qp_type;
3838        req.starting_psn = id_priv->seq_num;
3839        req.responder_resources = conn_param->responder_resources;
3840        req.initiator_depth = conn_param->initiator_depth;
3841        req.flow_control = conn_param->flow_control;
3842        req.retry_count = min_t(u8, 7, conn_param->retry_count);
3843        req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
3844        req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
3845        req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
3846        req.max_cm_retries = CMA_MAX_CM_RETRIES;
3847        req.srq = id_priv->srq ? 1 : 0;
3848
3849        ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
3850out:
3851        if (ret && !IS_ERR(id)) {
3852                ib_destroy_cm_id(id);
3853                id_priv->cm_id.ib = NULL;
3854        }
3855
3856        kfree(private_data);
3857        return ret;
3858}
3859
3860static int cma_connect_iw(struct rdma_id_private *id_priv,
3861                          struct rdma_conn_param *conn_param)
3862{
3863        struct iw_cm_id *cm_id;
3864        int ret;
3865        struct iw_cm_conn_param iw_param;
3866
3867        cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
3868        if (IS_ERR(cm_id))
3869                return PTR_ERR(cm_id);
3870
3871        cm_id->tos = id_priv->tos;
3872        cm_id->tos_set = id_priv->tos_set;
3873        id_priv->cm_id.iw = cm_id;
3874
3875        memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
3876               rdma_addr_size(cma_src_addr(id_priv)));
3877        memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv),
3878               rdma_addr_size(cma_dst_addr(id_priv)));
3879
3880        ret = cma_modify_qp_rtr(id_priv, conn_param);
3881        if (ret)
3882                goto out;
3883
3884        if (conn_param) {
3885                iw_param.ord = conn_param->initiator_depth;
3886                iw_param.ird = conn_param->responder_resources;
3887                iw_param.private_data = conn_param->private_data;
3888                iw_param.private_data_len = conn_param->private_data_len;
3889                iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
3890        } else {
3891                memset(&iw_param, 0, sizeof iw_param);
3892                iw_param.qpn = id_priv->qp_num;
3893        }
3894        ret = iw_cm_connect(cm_id, &iw_param);
3895out:
3896        if (ret) {
3897                iw_destroy_cm_id(cm_id);
3898                id_priv->cm_id.iw = NULL;
3899        }
3900        return ret;
3901}
3902
3903int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
3904{
3905        struct rdma_id_private *id_priv;
3906        int ret;
3907
3908        id_priv = container_of(id, struct rdma_id_private, id);
3909        if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
3910                return -EINVAL;
3911
3912        if (!id->qp) {
3913                id_priv->qp_num = conn_param->qp_num;
3914                id_priv->srq = conn_param->srq;
3915        }
3916
3917        if (rdma_cap_ib_cm(id->device, id->port_num)) {
3918                if (id->qp_type == IB_QPT_UD)
3919                        ret = cma_resolve_ib_udp(id_priv, conn_param);
3920                else
3921                        ret = cma_connect_ib(id_priv, conn_param);
3922        } else if (rdma_cap_iw_cm(id->device, id->port_num))
3923                ret = cma_connect_iw(id_priv, conn_param);
3924        else
3925                ret = -ENOSYS;
3926        if (ret)
3927                goto err;
3928
3929        return 0;
3930err:
3931        cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
3932        return ret;
3933}
3934EXPORT_SYMBOL(rdma_connect);
3935
3936static int cma_accept_ib(struct rdma_id_private *id_priv,
3937                         struct rdma_conn_param *conn_param)
3938{
3939        struct ib_cm_rep_param rep;
3940        int ret;
3941
3942        ret = cma_modify_qp_rtr(id_priv, conn_param);
3943        if (ret)
3944                goto out;
3945
3946        ret = cma_modify_qp_rts(id_priv, conn_param);
3947        if (ret)
3948                goto out;
3949
3950        memset(&rep, 0, sizeof rep);
3951        rep.qp_num = id_priv->qp_num;
3952        rep.starting_psn = id_priv->seq_num;
3953        rep.private_data = conn_param->private_data;
3954        rep.private_data_len = conn_param->private_data_len;
3955        rep.responder_resources = conn_param->responder_resources;
3956        rep.initiator_depth = conn_param->initiator_depth;
3957        rep.failover_accepted = 0;
3958        rep.flow_control = conn_param->flow_control;
3959        rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
3960        rep.srq = id_priv->srq ? 1 : 0;
3961
3962        ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
3963out:
3964        return ret;
3965}
3966
3967static int cma_accept_iw(struct rdma_id_private *id_priv,
3968                  struct rdma_conn_param *conn_param)
3969{
3970        struct iw_cm_conn_param iw_param;
3971        int ret;
3972
3973        if (!conn_param)
3974                return -EINVAL;
3975
3976        ret = cma_modify_qp_rtr(id_priv, conn_param);
3977        if (ret)
3978                return ret;
3979
3980        iw_param.ord = conn_param->initiator_depth;
3981        iw_param.ird = conn_param->responder_resources;
3982        iw_param.private_data = conn_param->private_data;
3983        iw_param.private_data_len = conn_param->private_data_len;
3984        if (id_priv->id.qp) {
3985                iw_param.qpn = id_priv->qp_num;
3986        } else
3987                iw_param.qpn = conn_param->qp_num;
3988
3989        return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
3990}
3991
3992static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
3993                             enum ib_cm_sidr_status status, u32 qkey,
3994                             const void *private_data, int private_data_len)
3995{
3996        struct ib_cm_sidr_rep_param rep;
3997        int ret;
3998
3999        memset(&rep, 0, sizeof rep);
4000        rep.status = status;
4001        if (status == IB_SIDR_SUCCESS) {
4002                ret = cma_set_qkey(id_priv, qkey);
4003                if (ret)
4004                        return ret;
4005                rep.qp_num = id_priv->qp_num;
4006                rep.qkey = id_priv->qkey;
4007        }
4008        rep.private_data = private_data;
4009        rep.private_data_len = private_data_len;
4010
4011        return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
4012}
4013
4014int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
4015                  const char *caller)
4016{
4017        struct rdma_id_private *id_priv;
4018        int ret;
4019
4020        id_priv = container_of(id, struct rdma_id_private, id);
4021
4022        rdma_restrack_set_task(&id_priv->res, caller);
4023
4024        if (!cma_comp(id_priv, RDMA_CM_CONNECT))
4025                return -EINVAL;
4026
4027        if (!id->qp && conn_param) {
4028                id_priv->qp_num = conn_param->qp_num;
4029                id_priv->srq = conn_param->srq;
4030        }
4031
4032        if (rdma_cap_ib_cm(id->device, id->port_num)) {
4033                if (id->qp_type == IB_QPT_UD) {
4034                        if (conn_param)
4035                                ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
4036                                                        conn_param->qkey,
4037                                                        conn_param->private_data,
4038                                                        conn_param->private_data_len);
4039                        else
4040                                ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
4041                                                        0, NULL, 0);
4042                } else {
4043                        if (conn_param)
4044                                ret = cma_accept_ib(id_priv, conn_param);
4045                        else
4046                                ret = cma_rep_recv(id_priv);
4047                }
4048        } else if (rdma_cap_iw_cm(id->device, id->port_num))
4049                ret = cma_accept_iw(id_priv, conn_param);
4050        else
4051                ret = -ENOSYS;
4052
4053        if (ret)
4054                goto reject;
4055
4056        return 0;
4057reject:
4058        cma_modify_qp_err(id_priv);
4059        rdma_reject(id, NULL, 0);
4060        return ret;
4061}
4062EXPORT_SYMBOL(__rdma_accept);
4063
4064int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
4065{
4066        struct rdma_id_private *id_priv;
4067        int ret;
4068
4069        id_priv = container_of(id, struct rdma_id_private, id);
4070        if (!id_priv->cm_id.ib)
4071                return -EINVAL;
4072
4073        switch (id->device->node_type) {
4074        case RDMA_NODE_IB_CA:
4075                ret = ib_cm_notify(id_priv->cm_id.ib, event);
4076                break;
4077        default:
4078                ret = 0;
4079                break;
4080        }
4081        return ret;
4082}
4083EXPORT_SYMBOL(rdma_notify);
4084
4085int rdma_reject(struct rdma_cm_id *id, const void *private_data,
4086                u8 private_data_len)
4087{
4088        struct rdma_id_private *id_priv;
4089        int ret;
4090
4091        id_priv = container_of(id, struct rdma_id_private, id);
4092        if (!id_priv->cm_id.ib)
4093                return -EINVAL;
4094
4095        if (rdma_cap_ib_cm(id->device, id->port_num)) {
4096                if (id->qp_type == IB_QPT_UD)
4097                        ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
4098                                                private_data, private_data_len);
4099                else
4100                        ret = ib_send_cm_rej(id_priv->cm_id.ib,
4101                                             IB_CM_REJ_CONSUMER_DEFINED, NULL,
4102                                             0, private_data, private_data_len);
4103        } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
4104                ret = iw_cm_reject(id_priv->cm_id.iw,
4105                                   private_data, private_data_len);
4106        } else
4107                ret = -ENOSYS;
4108
4109        return ret;
4110}
4111EXPORT_SYMBOL(rdma_reject);
4112
4113int rdma_disconnect(struct rdma_cm_id *id)
4114{
4115        struct rdma_id_private *id_priv;
4116        int ret;
4117
4118        id_priv = container_of(id, struct rdma_id_private, id);
4119        if (!id_priv->cm_id.ib)
4120                return -EINVAL;
4121
4122        if (rdma_cap_ib_cm(id->device, id->port_num)) {
4123                ret = cma_modify_qp_err(id_priv);
4124                if (ret)
4125                        goto out;
4126                /* Initiate or respond to a disconnect. */
4127                if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
4128                        ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
4129        } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
4130                ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
4131        } else
4132                ret = -EINVAL;
4133
4134out:
4135        return ret;
4136}
4137EXPORT_SYMBOL(rdma_disconnect);
4138
4139static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
4140{
4141        struct rdma_id_private *id_priv;
4142        struct cma_multicast *mc = multicast->context;
4143        struct rdma_cm_event event = {};
4144        int ret = 0;
4145
4146        id_priv = mc->id_priv;
4147        mutex_lock(&id_priv->handler_mutex);
4148        if (id_priv->state != RDMA_CM_ADDR_BOUND &&
4149            id_priv->state != RDMA_CM_ADDR_RESOLVED)
4150                goto out;
4151
4152        if (!status)
4153                status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
4154        else
4155                pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
4156                                     status);
4157        mutex_lock(&id_priv->qp_mutex);
4158        if (!status && id_priv->id.qp) {
4159                status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
4160                                         be16_to_cpu(multicast->rec.mlid));
4161                if (status)
4162                        pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
4163                                             status);
4164        }
4165        mutex_unlock(&id_priv->qp_mutex);
4166
4167        event.status = status;
4168        event.param.ud.private_data = mc->context;
4169        if (!status) {
4170                struct rdma_dev_addr *dev_addr =
4171                        &id_priv->id.route.addr.dev_addr;
4172                struct net_device *ndev =
4173                        dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4174                enum ib_gid_type gid_type =
4175                        id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
4176                        rdma_start_port(id_priv->cma_dev->device)];
4177
4178                event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
4179                ret = ib_init_ah_from_mcmember(id_priv->id.device,
4180                                               id_priv->id.port_num,
4181                                               &multicast->rec,
4182                                               ndev, gid_type,
4183                                               &event.param.ud.ah_attr);
4184                if (ret)
4185                        event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
4186
4187                event.param.ud.qp_num = 0xFFFFFF;
4188                event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
4189                if (ndev)
4190                        dev_put(ndev);
4191        } else
4192                event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
4193
4194        ret = id_priv->id.event_handler(&id_priv->id, &event);
4195
4196        rdma_destroy_ah_attr(&event.param.ud.ah_attr);
4197        if (ret) {
4198                cma_exch(id_priv, RDMA_CM_DESTROYING);
4199                mutex_unlock(&id_priv->handler_mutex);
4200                rdma_destroy_id(&id_priv->id);
4201                return 0;
4202        }
4203
4204out:
4205        mutex_unlock(&id_priv->handler_mutex);
4206        return 0;
4207}
4208
4209static void cma_set_mgid(struct rdma_id_private *id_priv,
4210                         struct sockaddr *addr, union ib_gid *mgid)
4211{
4212        unsigned char mc_map[MAX_ADDR_LEN];
4213        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
4214        struct sockaddr_in *sin = (struct sockaddr_in *) addr;
4215        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
4216
4217        if (cma_any_addr(addr)) {
4218                memset(mgid, 0, sizeof *mgid);
4219        } else if ((addr->sa_family == AF_INET6) &&
4220                   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
4221                                                                 0xFF10A01B)) {
4222                /* IPv6 address is an SA assigned MGID. */
4223                memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
4224        } else if (addr->sa_family == AF_IB) {
4225                memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
4226        } else if (addr->sa_family == AF_INET6) {
4227                ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
4228                if (id_priv->id.ps == RDMA_PS_UDP)
4229                        mc_map[7] = 0x01;       /* Use RDMA CM signature */
4230                *mgid = *(union ib_gid *) (mc_map + 4);
4231        } else {
4232                ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
4233                if (id_priv->id.ps == RDMA_PS_UDP)
4234                        mc_map[7] = 0x01;       /* Use RDMA CM signature */
4235                *mgid = *(union ib_gid *) (mc_map + 4);
4236        }
4237}
4238
4239static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
4240                                 struct cma_multicast *mc)
4241{
4242        struct ib_sa_mcmember_rec rec;
4243        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
4244        ib_sa_comp_mask comp_mask;
4245        int ret;
4246
4247        ib_addr_get_mgid(dev_addr, &rec.mgid);
4248        ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
4249                                     &rec.mgid, &rec);
4250        if (ret)
4251                return ret;
4252
4253        ret = cma_set_qkey(id_priv, 0);
4254        if (ret)
4255                return ret;
4256
4257        cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
4258        rec.qkey = cpu_to_be32(id_priv->qkey);
4259        rdma_addr_get_sgid(dev_addr, &rec.port_gid);
4260        rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
4261        rec.join_state = mc->join_state;
4262
4263        if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
4264            (!ib_sa_sendonly_fullmem_support(&sa_client,
4265                                             id_priv->id.device,
4266                                             id_priv->id.port_num))) {
4267                dev_warn(
4268                        &id_priv->id.device->dev,
4269                        "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
4270                        id_priv->id.port_num);
4271                return -EOPNOTSUPP;
4272        }
4273
4274        comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
4275                    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
4276                    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
4277                    IB_SA_MCMEMBER_REC_FLOW_LABEL |
4278                    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
4279
4280        if (id_priv->id.ps == RDMA_PS_IPOIB)
4281                comp_mask |= IB_SA_MCMEMBER_REC_RATE |
4282                             IB_SA_MCMEMBER_REC_RATE_SELECTOR |
4283                             IB_SA_MCMEMBER_REC_MTU_SELECTOR |
4284                             IB_SA_MCMEMBER_REC_MTU |
4285                             IB_SA_MCMEMBER_REC_HOP_LIMIT;
4286
4287        mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
4288                                                id_priv->id.port_num, &rec,
4289                                                comp_mask, GFP_KERNEL,
4290                                                cma_ib_mc_handler, mc);
4291        return PTR_ERR_OR_ZERO(mc->multicast.ib);
4292}
4293
4294static void iboe_mcast_work_handler(struct work_struct *work)
4295{
4296        struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
4297        struct cma_multicast *mc = mw->mc;
4298        struct ib_sa_multicast *m = mc->multicast.ib;
4299
4300        mc->multicast.ib->context = mc;
4301        cma_ib_mc_handler(0, m);
4302        kref_put(&mc->mcref, release_mc);
4303        kfree(mw);
4304}
4305
4306static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
4307                              enum ib_gid_type gid_type)
4308{
4309        struct sockaddr_in *sin = (struct sockaddr_in *)addr;
4310        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
4311
4312        if (cma_any_addr(addr)) {
4313                memset(mgid, 0, sizeof *mgid);
4314        } else if (addr->sa_family == AF_INET6) {
4315                memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
4316        } else {
4317                mgid->raw[0] =
4318                        (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff;
4319                mgid->raw[1] =
4320                        (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e;
4321                mgid->raw[2] = 0;
4322                mgid->raw[3] = 0;
4323                mgid->raw[4] = 0;
4324                mgid->raw[5] = 0;
4325                mgid->raw[6] = 0;
4326                mgid->raw[7] = 0;
4327                mgid->raw[8] = 0;
4328                mgid->raw[9] = 0;
4329                mgid->raw[10] = 0xff;
4330                mgid->raw[11] = 0xff;
4331                *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
4332        }
4333}
4334
4335static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
4336                                   struct cma_multicast *mc)
4337{
4338        struct iboe_mcast_work *work;
4339        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
4340        int err = 0;
4341        struct sockaddr *addr = (struct sockaddr *)&mc->addr;
4342        struct net_device *ndev = NULL;
4343        enum ib_gid_type gid_type;
4344        bool send_only;
4345
4346        send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
4347
4348        if (cma_zero_addr((struct sockaddr *)&mc->addr))
4349                return -EINVAL;
4350
4351        work = kzalloc(sizeof *work, GFP_KERNEL);
4352        if (!work)
4353                return -ENOMEM;
4354
4355        mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
4356        if (!mc->multicast.ib) {
4357                err = -ENOMEM;
4358                goto out1;
4359        }
4360
4361        gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
4362                   rdma_start_port(id_priv->cma_dev->device)];
4363        cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
4364
4365        mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
4366        if (id_priv->id.ps == RDMA_PS_UDP)
4367                mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
4368
4369        if (dev_addr->bound_dev_if)
4370                ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4371        if (!ndev) {
4372                err = -ENODEV;
4373                goto out2;
4374        }
4375        mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
4376        mc->multicast.ib->rec.hop_limit = 1;
4377        mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
4378
4379        if (addr->sa_family == AF_INET) {
4380                if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
4381                        mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
4382                        if (!send_only) {
4383                                err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
4384                                                    true);
4385                        }
4386                }
4387        } else {
4388                if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
4389                        err = -ENOTSUPP;
4390        }
4391        dev_put(ndev);
4392        if (err || !mc->multicast.ib->rec.mtu) {
4393                if (!err)
4394                        err = -EINVAL;
4395                goto out2;
4396        }
4397        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
4398                    &mc->multicast.ib->rec.port_gid);
4399        work->id = id_priv;
4400        work->mc = mc;
4401        INIT_WORK(&work->work, iboe_mcast_work_handler);
4402        kref_get(&mc->mcref);
4403        queue_work(cma_wq, &work->work);
4404
4405        return 0;
4406
4407out2:
4408        kfree(mc->multicast.ib);
4409out1:
4410        kfree(work);
4411        return err;
4412}
4413
4414int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
4415                        u8 join_state, void *context)
4416{
4417        struct rdma_id_private *id_priv;
4418        struct cma_multicast *mc;
4419        int ret;
4420
4421        if (!id->device)
4422                return -EINVAL;
4423
4424        id_priv = container_of(id, struct rdma_id_private, id);
4425        if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
4426            !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
4427                return -EINVAL;
4428
4429        mc = kmalloc(sizeof *mc, GFP_KERNEL);
4430        if (!mc)
4431                return -ENOMEM;
4432
4433        memcpy(&mc->addr, addr, rdma_addr_size(addr));
4434        mc->context = context;
4435        mc->id_priv = id_priv;
4436        mc->join_state = join_state;
4437
4438        if (rdma_protocol_roce(id->device, id->port_num)) {
4439                kref_init(&mc->mcref);
4440                ret = cma_iboe_join_multicast(id_priv, mc);
4441                if (ret)
4442                        goto out_err;
4443        } else if (rdma_cap_ib_mcast(id->device, id->port_num)) {
4444                ret = cma_join_ib_multicast(id_priv, mc);
4445                if (ret)
4446                        goto out_err;
4447        } else {
4448                ret = -ENOSYS;
4449                goto out_err;
4450        }
4451
4452        spin_lock(&id_priv->lock);
4453        list_add(&mc->list, &id_priv->mc_list);
4454        spin_unlock(&id_priv->lock);
4455
4456        return 0;
4457out_err:
4458        kfree(mc);
4459        return ret;
4460}
4461EXPORT_SYMBOL(rdma_join_multicast);
4462
4463void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
4464{
4465        struct rdma_id_private *id_priv;
4466        struct cma_multicast *mc;
4467
4468        id_priv = container_of(id, struct rdma_id_private, id);
4469        spin_lock_irq(&id_priv->lock);
4470        list_for_each_entry(mc, &id_priv->mc_list, list) {
4471                if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
4472                        list_del(&mc->list);
4473                        spin_unlock_irq(&id_priv->lock);
4474
4475                        if (id->qp)
4476                                ib_detach_mcast(id->qp,
4477                                                &mc->multicast.ib->rec.mgid,
4478                                                be16_to_cpu(mc->multicast.ib->rec.mlid));
4479
4480                        BUG_ON(id_priv->cma_dev->device != id->device);
4481
4482                        if (rdma_cap_ib_mcast(id->device, id->port_num)) {
4483                                ib_sa_free_multicast(mc->multicast.ib);
4484                                kfree(mc);
4485                        } else if (rdma_protocol_roce(id->device, id->port_num)) {
4486                                cma_leave_roce_mc_group(id_priv, mc);
4487                        }
4488                        return;
4489                }
4490        }
4491        spin_unlock_irq(&id_priv->lock);
4492}
4493EXPORT_SYMBOL(rdma_leave_multicast);
4494
4495static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
4496{
4497        struct rdma_dev_addr *dev_addr;
4498        struct cma_ndev_work *work;
4499
4500        dev_addr = &id_priv->id.route.addr.dev_addr;
4501
4502        if ((dev_addr->bound_dev_if == ndev->ifindex) &&
4503            (net_eq(dev_net(ndev), dev_addr->net)) &&
4504            memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
4505                pr_info("RDMA CM addr change for ndev %s used by id %p\n",
4506                        ndev->name, &id_priv->id);
4507                work = kzalloc(sizeof *work, GFP_KERNEL);
4508                if (!work)
4509                        return -ENOMEM;
4510
4511                INIT_WORK(&work->work, cma_ndev_work_handler);
4512                work->id = id_priv;
4513                work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
4514                atomic_inc(&id_priv->refcount);
4515                queue_work(cma_wq, &work->work);
4516        }
4517
4518        return 0;
4519}
4520
4521static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
4522                               void *ptr)
4523{
4524        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4525        struct cma_device *cma_dev;
4526        struct rdma_id_private *id_priv;
4527        int ret = NOTIFY_DONE;
4528
4529        if (event != NETDEV_BONDING_FAILOVER)
4530                return NOTIFY_DONE;
4531
4532        if (!netif_is_bond_master(ndev))
4533                return NOTIFY_DONE;
4534
4535        mutex_lock(&lock);
4536        list_for_each_entry(cma_dev, &dev_list, list)
4537                list_for_each_entry(id_priv, &cma_dev->id_list, list) {
4538                        ret = cma_netdev_change(ndev, id_priv);
4539                        if (ret)
4540                                goto out;
4541                }
4542
4543out:
4544        mutex_unlock(&lock);
4545        return ret;
4546}
4547
4548static struct notifier_block cma_nb = {
4549        .notifier_call = cma_netdev_callback
4550};
4551
4552static void cma_add_one(struct ib_device *device)
4553{
4554        struct cma_device *cma_dev;
4555        struct rdma_id_private *id_priv;
4556        unsigned int i;
4557        unsigned long supported_gids = 0;
4558
4559        cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
4560        if (!cma_dev)
4561                return;
4562
4563        cma_dev->device = device;
4564        cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
4565                                            sizeof(*cma_dev->default_gid_type),
4566                                            GFP_KERNEL);
4567        if (!cma_dev->default_gid_type)
4568                goto free_cma_dev;
4569
4570        cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
4571                                            sizeof(*cma_dev->default_roce_tos),
4572                                            GFP_KERNEL);
4573        if (!cma_dev->default_roce_tos)
4574                goto free_gid_type;
4575
4576        rdma_for_each_port (device, i) {
4577                supported_gids = roce_gid_type_mask_support(device, i);
4578                WARN_ON(!supported_gids);
4579                if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
4580                        cma_dev->default_gid_type[i - rdma_start_port(device)] =
4581                                CMA_PREFERRED_ROCE_GID_TYPE;
4582                else
4583                        cma_dev->default_gid_type[i - rdma_start_port(device)] =
4584                                find_first_bit(&supported_gids, BITS_PER_LONG);
4585                cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
4586        }
4587
4588        init_completion(&cma_dev->comp);
4589        atomic_set(&cma_dev->refcount, 1);
4590        INIT_LIST_HEAD(&cma_dev->id_list);
4591        ib_set_client_data(device, &cma_client, cma_dev);
4592
4593        mutex_lock(&lock);
4594        list_add_tail(&cma_dev->list, &dev_list);
4595        list_for_each_entry(id_priv, &listen_any_list, list)
4596                cma_listen_on_dev(id_priv, cma_dev);
4597        mutex_unlock(&lock);
4598
4599        return;
4600
4601free_gid_type:
4602        kfree(cma_dev->default_gid_type);
4603
4604free_cma_dev:
4605        kfree(cma_dev);
4606
4607        return;
4608}
4609
4610static int cma_remove_id_dev(struct rdma_id_private *id_priv)
4611{
4612        struct rdma_cm_event event = {};
4613        enum rdma_cm_state state;
4614        int ret = 0;
4615
4616        /* Record that we want to remove the device */
4617        state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
4618        if (state == RDMA_CM_DESTROYING)
4619                return 0;
4620
4621        cma_cancel_operation(id_priv, state);
4622        mutex_lock(&id_priv->handler_mutex);
4623
4624        /* Check for destruction from another callback. */
4625        if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
4626                goto out;
4627
4628        event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
4629        ret = id_priv->id.event_handler(&id_priv->id, &event);
4630out:
4631        mutex_unlock(&id_priv->handler_mutex);
4632        return ret;
4633}
4634
4635static void cma_process_remove(struct cma_device *cma_dev)
4636{
4637        struct rdma_id_private *id_priv;
4638        int ret;
4639
4640        mutex_lock(&lock);
4641        while (!list_empty(&cma_dev->id_list)) {
4642                id_priv = list_entry(cma_dev->id_list.next,
4643                                     struct rdma_id_private, list);
4644
4645                list_del(&id_priv->listen_list);
4646                list_del_init(&id_priv->list);
4647                atomic_inc(&id_priv->refcount);
4648                mutex_unlock(&lock);
4649
4650                ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
4651                cma_deref_id(id_priv);
4652                if (ret)
4653                        rdma_destroy_id(&id_priv->id);
4654
4655                mutex_lock(&lock);
4656        }
4657        mutex_unlock(&lock);
4658
4659        cma_deref_dev(cma_dev);
4660        wait_for_completion(&cma_dev->comp);
4661}
4662
4663static void cma_remove_one(struct ib_device *device, void *client_data)
4664{
4665        struct cma_device *cma_dev = client_data;
4666
4667        if (!cma_dev)
4668                return;
4669
4670        mutex_lock(&lock);
4671        list_del(&cma_dev->list);
4672        mutex_unlock(&lock);
4673
4674        cma_process_remove(cma_dev);
4675        kfree(cma_dev->default_roce_tos);
4676        kfree(cma_dev->default_gid_type);
4677        kfree(cma_dev);
4678}
4679
4680static int cma_init_net(struct net *net)
4681{
4682        struct cma_pernet *pernet = cma_pernet(net);
4683
4684        xa_init(&pernet->tcp_ps);
4685        xa_init(&pernet->udp_ps);
4686        xa_init(&pernet->ipoib_ps);
4687        xa_init(&pernet->ib_ps);
4688
4689        return 0;
4690}
4691
4692static void cma_exit_net(struct net *net)
4693{
4694        struct cma_pernet *pernet = cma_pernet(net);
4695
4696        WARN_ON(!xa_empty(&pernet->tcp_ps));
4697        WARN_ON(!xa_empty(&pernet->udp_ps));
4698        WARN_ON(!xa_empty(&pernet->ipoib_ps));
4699        WARN_ON(!xa_empty(&pernet->ib_ps));
4700}
4701
4702static struct pernet_operations cma_pernet_operations = {
4703        .init = cma_init_net,
4704        .exit = cma_exit_net,
4705        .id = &cma_pernet_id,
4706        .size = sizeof(struct cma_pernet),
4707};
4708
4709static int __init cma_init(void)
4710{
4711        int ret;
4712
4713        cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
4714        if (!cma_wq)
4715                return -ENOMEM;
4716
4717        ret = register_pernet_subsys(&cma_pernet_operations);
4718        if (ret)
4719                goto err_wq;
4720
4721        ib_sa_register_client(&sa_client);
4722        register_netdevice_notifier(&cma_nb);
4723
4724        ret = ib_register_client(&cma_client);
4725        if (ret)
4726                goto err;
4727
4728        ret = cma_configfs_init();
4729        if (ret)
4730                goto err_ib;
4731
4732        return 0;
4733
4734err_ib:
4735        ib_unregister_client(&cma_client);
4736err:
4737        unregister_netdevice_notifier(&cma_nb);
4738        ib_sa_unregister_client(&sa_client);
4739err_wq:
4740        destroy_workqueue(cma_wq);
4741        return ret;
4742}
4743
4744static void __exit cma_cleanup(void)
4745{
4746        cma_configfs_exit();
4747        ib_unregister_client(&cma_client);
4748        unregister_netdevice_notifier(&cma_nb);
4749        ib_sa_unregister_client(&sa_client);
4750        unregister_pernet_subsys(&cma_pernet_operations);
4751        destroy_workqueue(cma_wq);
4752}
4753
4754module_init(cma_init);
4755module_exit(cma_cleanup);
4756