linux/drivers/infiniband/core/addr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
   3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
   4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
   5 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/mutex.h>
  37#include <linux/inetdevice.h>
  38#include <linux/slab.h>
  39#include <linux/workqueue.h>
  40#include <linux/module.h>
  41#include <net/arp.h>
  42#include <net/neighbour.h>
  43#include <net/route.h>
  44#include <net/netevent.h>
  45#include <net/addrconf.h>
  46#include <net/ip6_route.h>
  47#include <rdma/ib_addr.h>
  48#include <rdma/ib.h>
  49#include <rdma/rdma_netlink.h>
  50#include <net/netlink.h>
  51
  52#include "core_priv.h"
  53
  54struct addr_req {
  55        struct list_head list;
  56        struct sockaddr_storage src_addr;
  57        struct sockaddr_storage dst_addr;
  58        struct rdma_dev_addr *addr;
  59        struct rdma_addr_client *client;
  60        void *context;
  61        void (*callback)(int status, struct sockaddr *src_addr,
  62                         struct rdma_dev_addr *addr, void *context);
  63        unsigned long timeout;
  64        struct delayed_work work;
  65        int status;
  66        u32 seq;
  67};
  68
  69static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
  70
  71static void process_req(struct work_struct *work);
  72
  73static DEFINE_MUTEX(lock);
  74static LIST_HEAD(req_list);
  75static DECLARE_DELAYED_WORK(work, process_req);
  76static struct workqueue_struct *addr_wq;
  77
  78static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
  79        [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
  80                .len = sizeof(struct rdma_nla_ls_gid)},
  81};
  82
  83static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
  84{
  85        struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
  86        int ret;
  87
  88        if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
  89                return false;
  90
  91        ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
  92                        nlmsg_len(nlh), ib_nl_addr_policy, NULL);
  93        if (ret)
  94                return false;
  95
  96        return true;
  97}
  98
  99static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
 100{
 101        const struct nlattr *head, *curr;
 102        union ib_gid gid;
 103        struct addr_req *req;
 104        int len, rem;
 105        int found = 0;
 106
 107        head = (const struct nlattr *)nlmsg_data(nlh);
 108        len = nlmsg_len(nlh);
 109
 110        nla_for_each_attr(curr, head, len, rem) {
 111                if (curr->nla_type == LS_NLA_TYPE_DGID)
 112                        memcpy(&gid, nla_data(curr), nla_len(curr));
 113        }
 114
 115        mutex_lock(&lock);
 116        list_for_each_entry(req, &req_list, list) {
 117                if (nlh->nlmsg_seq != req->seq)
 118                        continue;
 119                /* We set the DGID part, the rest was set earlier */
 120                rdma_addr_set_dgid(req->addr, &gid);
 121                req->status = 0;
 122                found = 1;
 123                break;
 124        }
 125        mutex_unlock(&lock);
 126
 127        if (!found)
 128                pr_info("Couldn't find request waiting for DGID: %pI6\n",
 129                        &gid);
 130}
 131
 132int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
 133                             struct netlink_callback *cb)
 134{
 135        const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
 136
 137        if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
 138            !(NETLINK_CB(skb).sk) ||
 139            !netlink_capable(skb, CAP_NET_ADMIN))
 140                return -EPERM;
 141
 142        if (ib_nl_is_good_ip_resp(nlh))
 143                ib_nl_process_good_ip_rsep(nlh);
 144
 145        return skb->len;
 146}
 147
 148static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
 149                             const void *daddr,
 150                             u32 seq, u16 family)
 151{
 152        struct sk_buff *skb = NULL;
 153        struct nlmsghdr *nlh;
 154        struct rdma_ls_ip_resolve_header *header;
 155        void *data;
 156        size_t size;
 157        int attrtype;
 158        int len;
 159
 160        if (family == AF_INET) {
 161                size = sizeof(struct in_addr);
 162                attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
 163        } else {
 164                size = sizeof(struct in6_addr);
 165                attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
 166        }
 167
 168        len = nla_total_size(sizeof(size));
 169        len += NLMSG_ALIGN(sizeof(*header));
 170
 171        skb = nlmsg_new(len, GFP_KERNEL);
 172        if (!skb)
 173                return -ENOMEM;
 174
 175        data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
 176                            RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
 177        if (!data) {
 178                nlmsg_free(skb);
 179                return -ENODATA;
 180        }
 181
 182        /* Construct the family header first */
 183        header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
 184        header->ifindex = dev_addr->bound_dev_if;
 185        nla_put(skb, attrtype, size, daddr);
 186
 187        /* Repair the nlmsg header length */
 188        nlmsg_end(skb, nlh);
 189        ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
 190
 191        /* Make the request retry, so when we get the response from userspace
 192         * we will have something.
 193         */
 194        return -ENODATA;
 195}
 196
 197int rdma_addr_size(struct sockaddr *addr)
 198{
 199        switch (addr->sa_family) {
 200        case AF_INET:
 201                return sizeof(struct sockaddr_in);
 202        case AF_INET6:
 203                return sizeof(struct sockaddr_in6);
 204        case AF_IB:
 205                return sizeof(struct sockaddr_ib);
 206        default:
 207                return 0;
 208        }
 209}
 210EXPORT_SYMBOL(rdma_addr_size);
 211
 212static struct rdma_addr_client self;
 213
 214void rdma_addr_register_client(struct rdma_addr_client *client)
 215{
 216        atomic_set(&client->refcount, 1);
 217        init_completion(&client->comp);
 218}
 219EXPORT_SYMBOL(rdma_addr_register_client);
 220
 221static inline void put_client(struct rdma_addr_client *client)
 222{
 223        if (atomic_dec_and_test(&client->refcount))
 224                complete(&client->comp);
 225}
 226
 227void rdma_addr_unregister_client(struct rdma_addr_client *client)
 228{
 229        put_client(client);
 230        wait_for_completion(&client->comp);
 231}
 232EXPORT_SYMBOL(rdma_addr_unregister_client);
 233
 234int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 235                     const unsigned char *dst_dev_addr)
 236{
 237        dev_addr->dev_type = dev->type;
 238        memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
 239        memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
 240        if (dst_dev_addr)
 241                memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
 242        dev_addr->bound_dev_if = dev->ifindex;
 243        return 0;
 244}
 245EXPORT_SYMBOL(rdma_copy_addr);
 246
 247int rdma_translate_ip(const struct sockaddr *addr,
 248                      struct rdma_dev_addr *dev_addr,
 249                      u16 *vlan_id)
 250{
 251        struct net_device *dev;
 252        int ret = -EADDRNOTAVAIL;
 253
 254        if (dev_addr->bound_dev_if) {
 255                dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
 256                if (!dev)
 257                        return -ENODEV;
 258                ret = rdma_copy_addr(dev_addr, dev, NULL);
 259                dev_put(dev);
 260                return ret;
 261        }
 262
 263        switch (addr->sa_family) {
 264        case AF_INET:
 265                dev = ip_dev_find(dev_addr->net,
 266                        ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
 267
 268                if (!dev)
 269                        return ret;
 270
 271                ret = rdma_copy_addr(dev_addr, dev, NULL);
 272                dev_addr->bound_dev_if = dev->ifindex;
 273                if (vlan_id)
 274                        *vlan_id = rdma_vlan_dev_vlan_id(dev);
 275                dev_put(dev);
 276                break;
 277#if IS_ENABLED(CONFIG_IPV6)
 278        case AF_INET6:
 279                rcu_read_lock();
 280                for_each_netdev_rcu(dev_addr->net, dev) {
 281                        if (ipv6_chk_addr(dev_addr->net,
 282                                          &((const struct sockaddr_in6 *)addr)->sin6_addr,
 283                                          dev, 1)) {
 284                                ret = rdma_copy_addr(dev_addr, dev, NULL);
 285                                dev_addr->bound_dev_if = dev->ifindex;
 286                                if (vlan_id)
 287                                        *vlan_id = rdma_vlan_dev_vlan_id(dev);
 288                                break;
 289                        }
 290                }
 291                rcu_read_unlock();
 292                break;
 293#endif
 294        }
 295        return ret;
 296}
 297EXPORT_SYMBOL(rdma_translate_ip);
 298
 299static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
 300{
 301        unsigned long delay;
 302
 303        delay = time - jiffies;
 304        if ((long)delay < 0)
 305                delay = 0;
 306
 307        mod_delayed_work(addr_wq, delayed_work, delay);
 308}
 309
 310static void queue_req(struct addr_req *req)
 311{
 312        struct addr_req *temp_req;
 313
 314        mutex_lock(&lock);
 315        list_for_each_entry_reverse(temp_req, &req_list, list) {
 316                if (time_after_eq(req->timeout, temp_req->timeout))
 317                        break;
 318        }
 319
 320        list_add(&req->list, &temp_req->list);
 321
 322        set_timeout(&req->work, req->timeout);
 323        mutex_unlock(&lock);
 324}
 325
 326static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
 327                          const void *daddr, u32 seq, u16 family)
 328{
 329        if (ibnl_chk_listeners(RDMA_NL_GROUP_LS))
 330                return -EADDRNOTAVAIL;
 331
 332        /* We fill in what we can, the response will fill the rest */
 333        rdma_copy_addr(dev_addr, dst->dev, NULL);
 334        return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
 335}
 336
 337static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
 338                        const void *daddr)
 339{
 340        struct neighbour *n;
 341        int ret;
 342
 343        n = dst_neigh_lookup(dst, daddr);
 344
 345        rcu_read_lock();
 346        if (!n || !(n->nud_state & NUD_VALID)) {
 347                if (n)
 348                        neigh_event_send(n, NULL);
 349                ret = -ENODATA;
 350        } else {
 351                ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
 352        }
 353        rcu_read_unlock();
 354
 355        if (n)
 356                neigh_release(n);
 357
 358        return ret;
 359}
 360
 361static bool has_gateway(struct dst_entry *dst, sa_family_t family)
 362{
 363        struct rtable *rt;
 364        struct rt6_info *rt6;
 365
 366        if (family == AF_INET) {
 367                rt = container_of(dst, struct rtable, dst);
 368                return rt->rt_uses_gateway;
 369        }
 370
 371        rt6 = container_of(dst, struct rt6_info, dst);
 372        return rt6->rt6i_flags & RTF_GATEWAY;
 373}
 374
 375static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
 376                    const struct sockaddr *dst_in, u32 seq)
 377{
 378        const struct sockaddr_in *dst_in4 =
 379                (const struct sockaddr_in *)dst_in;
 380        const struct sockaddr_in6 *dst_in6 =
 381                (const struct sockaddr_in6 *)dst_in;
 382        const void *daddr = (dst_in->sa_family == AF_INET) ?
 383                (const void *)&dst_in4->sin_addr.s_addr :
 384                (const void *)&dst_in6->sin6_addr;
 385        sa_family_t family = dst_in->sa_family;
 386
 387        /* Gateway + ARPHRD_INFINIBAND -> IB router */
 388        if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
 389                return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
 390        else
 391                return dst_fetch_ha(dst, dev_addr, daddr);
 392}
 393
 394static int addr4_resolve(struct sockaddr_in *src_in,
 395                         const struct sockaddr_in *dst_in,
 396                         struct rdma_dev_addr *addr,
 397                         struct rtable **prt)
 398{
 399        __be32 src_ip = src_in->sin_addr.s_addr;
 400        __be32 dst_ip = dst_in->sin_addr.s_addr;
 401        struct rtable *rt;
 402        struct flowi4 fl4;
 403        int ret;
 404
 405        memset(&fl4, 0, sizeof(fl4));
 406        fl4.daddr = dst_ip;
 407        fl4.saddr = src_ip;
 408        fl4.flowi4_oif = addr->bound_dev_if;
 409        rt = ip_route_output_key(addr->net, &fl4);
 410        ret = PTR_ERR_OR_ZERO(rt);
 411        if (ret)
 412                return ret;
 413
 414        src_in->sin_family = AF_INET;
 415        src_in->sin_addr.s_addr = fl4.saddr;
 416
 417        /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
 418         * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
 419         * type accordingly.
 420         */
 421        if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
 422                addr->network = RDMA_NETWORK_IPV4;
 423
 424        addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
 425
 426        *prt = rt;
 427        return 0;
 428}
 429
 430#if IS_ENABLED(CONFIG_IPV6)
 431static int addr6_resolve(struct sockaddr_in6 *src_in,
 432                         const struct sockaddr_in6 *dst_in,
 433                         struct rdma_dev_addr *addr,
 434                         struct dst_entry **pdst)
 435{
 436        struct flowi6 fl6;
 437        struct dst_entry *dst;
 438        struct rt6_info *rt;
 439        int ret;
 440
 441        memset(&fl6, 0, sizeof fl6);
 442        fl6.daddr = dst_in->sin6_addr;
 443        fl6.saddr = src_in->sin6_addr;
 444        fl6.flowi6_oif = addr->bound_dev_if;
 445
 446        ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
 447        if (ret < 0)
 448                return ret;
 449
 450        rt = (struct rt6_info *)dst;
 451        if (ipv6_addr_any(&src_in->sin6_addr)) {
 452                src_in->sin6_family = AF_INET6;
 453                src_in->sin6_addr = fl6.saddr;
 454        }
 455
 456        /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
 457         * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
 458         * type accordingly.
 459         */
 460        if (rt->rt6i_flags & RTF_GATEWAY &&
 461            ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
 462                addr->network = RDMA_NETWORK_IPV6;
 463
 464        addr->hoplimit = ip6_dst_hoplimit(dst);
 465
 466        *pdst = dst;
 467        return 0;
 468}
 469#else
 470static int addr6_resolve(struct sockaddr_in6 *src_in,
 471                         const struct sockaddr_in6 *dst_in,
 472                         struct rdma_dev_addr *addr,
 473                         struct dst_entry **pdst)
 474{
 475        return -EADDRNOTAVAIL;
 476}
 477#endif
 478
 479static int addr_resolve_neigh(struct dst_entry *dst,
 480                              const struct sockaddr *dst_in,
 481                              struct rdma_dev_addr *addr,
 482                              u32 seq)
 483{
 484        if (dst->dev->flags & IFF_LOOPBACK) {
 485                int ret;
 486
 487                ret = rdma_translate_ip(dst_in, addr, NULL);
 488                if (!ret)
 489                        memcpy(addr->dst_dev_addr, addr->src_dev_addr,
 490                               MAX_ADDR_LEN);
 491
 492                return ret;
 493        }
 494
 495        /* If the device doesn't do ARP internally */
 496        if (!(dst->dev->flags & IFF_NOARP))
 497                return fetch_ha(dst, addr, dst_in, seq);
 498
 499        return rdma_copy_addr(addr, dst->dev, NULL);
 500}
 501
 502static int addr_resolve(struct sockaddr *src_in,
 503                        const struct sockaddr *dst_in,
 504                        struct rdma_dev_addr *addr,
 505                        bool resolve_neigh,
 506                        u32 seq)
 507{
 508        struct net_device *ndev;
 509        struct dst_entry *dst;
 510        int ret;
 511
 512        if (!addr->net) {
 513                pr_warn_ratelimited("%s: missing namespace\n", __func__);
 514                return -EINVAL;
 515        }
 516
 517        if (src_in->sa_family == AF_INET) {
 518                struct rtable *rt = NULL;
 519                const struct sockaddr_in *dst_in4 =
 520                        (const struct sockaddr_in *)dst_in;
 521
 522                ret = addr4_resolve((struct sockaddr_in *)src_in,
 523                                    dst_in4, addr, &rt);
 524                if (ret)
 525                        return ret;
 526
 527                if (resolve_neigh)
 528                        ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
 529
 530                if (addr->bound_dev_if) {
 531                        ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
 532                } else {
 533                        ndev = rt->dst.dev;
 534                        dev_hold(ndev);
 535                }
 536
 537                ip_rt_put(rt);
 538        } else {
 539                const struct sockaddr_in6 *dst_in6 =
 540                        (const struct sockaddr_in6 *)dst_in;
 541
 542                ret = addr6_resolve((struct sockaddr_in6 *)src_in,
 543                                    dst_in6, addr,
 544                                    &dst);
 545                if (ret)
 546                        return ret;
 547
 548                if (resolve_neigh)
 549                        ret = addr_resolve_neigh(dst, dst_in, addr, seq);
 550
 551                if (addr->bound_dev_if) {
 552                        ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
 553                } else {
 554                        ndev = dst->dev;
 555                        dev_hold(ndev);
 556                }
 557
 558                dst_release(dst);
 559        }
 560
 561        if (ndev->flags & IFF_LOOPBACK) {
 562                ret = rdma_translate_ip(dst_in, addr, NULL);
 563                /*
 564                 * Put the loopback device and get the translated
 565                 * device instead.
 566                 */
 567                dev_put(ndev);
 568                ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
 569        } else {
 570                addr->bound_dev_if = ndev->ifindex;
 571        }
 572        dev_put(ndev);
 573
 574        return ret;
 575}
 576
 577static void process_one_req(struct work_struct *_work)
 578{
 579        struct addr_req *req;
 580        struct sockaddr *src_in, *dst_in;
 581
 582        mutex_lock(&lock);
 583        req = container_of(_work, struct addr_req, work.work);
 584
 585        if (req->status == -ENODATA) {
 586                src_in = (struct sockaddr *)&req->src_addr;
 587                dst_in = (struct sockaddr *)&req->dst_addr;
 588                req->status = addr_resolve(src_in, dst_in, req->addr,
 589                                           true, req->seq);
 590                if (req->status && time_after_eq(jiffies, req->timeout)) {
 591                        req->status = -ETIMEDOUT;
 592                } else if (req->status == -ENODATA) {
 593                        /* requeue the work for retrying again */
 594                        set_timeout(&req->work, req->timeout);
 595                        mutex_unlock(&lock);
 596                        return;
 597                }
 598        }
 599        list_del(&req->list);
 600        mutex_unlock(&lock);
 601
 602        req->callback(req->status, (struct sockaddr *)&req->src_addr,
 603                req->addr, req->context);
 604        put_client(req->client);
 605        kfree(req);
 606}
 607
 608static void process_req(struct work_struct *work)
 609{
 610        struct addr_req *req, *temp_req;
 611        struct sockaddr *src_in, *dst_in;
 612        struct list_head done_list;
 613
 614        INIT_LIST_HEAD(&done_list);
 615
 616        mutex_lock(&lock);
 617        list_for_each_entry_safe(req, temp_req, &req_list, list) {
 618                if (req->status == -ENODATA) {
 619                        src_in = (struct sockaddr *) &req->src_addr;
 620                        dst_in = (struct sockaddr *) &req->dst_addr;
 621                        req->status = addr_resolve(src_in, dst_in, req->addr,
 622                                                   true, req->seq);
 623                        if (req->status && time_after_eq(jiffies, req->timeout))
 624                                req->status = -ETIMEDOUT;
 625                        else if (req->status == -ENODATA) {
 626                                set_timeout(&req->work, req->timeout);
 627                                continue;
 628                        }
 629                }
 630                list_move_tail(&req->list, &done_list);
 631        }
 632
 633        mutex_unlock(&lock);
 634
 635        list_for_each_entry_safe(req, temp_req, &done_list, list) {
 636                list_del(&req->list);
 637                /* It is safe to cancel other work items from this work item
 638                 * because at a time there can be only one work item running
 639                 * with this single threaded work queue.
 640                 */
 641                cancel_delayed_work(&req->work);
 642                req->callback(req->status, (struct sockaddr *) &req->src_addr,
 643                        req->addr, req->context);
 644                put_client(req->client);
 645                kfree(req);
 646        }
 647}
 648
 649int rdma_resolve_ip(struct rdma_addr_client *client,
 650                    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 651                    struct rdma_dev_addr *addr, int timeout_ms,
 652                    void (*callback)(int status, struct sockaddr *src_addr,
 653                                     struct rdma_dev_addr *addr, void *context),
 654                    void *context)
 655{
 656        struct sockaddr *src_in, *dst_in;
 657        struct addr_req *req;
 658        int ret = 0;
 659
 660        req = kzalloc(sizeof *req, GFP_KERNEL);
 661        if (!req)
 662                return -ENOMEM;
 663
 664        src_in = (struct sockaddr *) &req->src_addr;
 665        dst_in = (struct sockaddr *) &req->dst_addr;
 666
 667        if (src_addr) {
 668                if (src_addr->sa_family != dst_addr->sa_family) {
 669                        ret = -EINVAL;
 670                        goto err;
 671                }
 672
 673                memcpy(src_in, src_addr, rdma_addr_size(src_addr));
 674        } else {
 675                src_in->sa_family = dst_addr->sa_family;
 676        }
 677
 678        memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
 679        req->addr = addr;
 680        req->callback = callback;
 681        req->context = context;
 682        req->client = client;
 683        atomic_inc(&client->refcount);
 684        INIT_DELAYED_WORK(&req->work, process_one_req);
 685        req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
 686
 687        req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
 688        switch (req->status) {
 689        case 0:
 690                req->timeout = jiffies;
 691                queue_req(req);
 692                break;
 693        case -ENODATA:
 694                req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
 695                queue_req(req);
 696                break;
 697        default:
 698                ret = req->status;
 699                atomic_dec(&client->refcount);
 700                goto err;
 701        }
 702        return ret;
 703err:
 704        kfree(req);
 705        return ret;
 706}
 707EXPORT_SYMBOL(rdma_resolve_ip);
 708
 709int rdma_resolve_ip_route(struct sockaddr *src_addr,
 710                          const struct sockaddr *dst_addr,
 711                          struct rdma_dev_addr *addr)
 712{
 713        struct sockaddr_storage ssrc_addr = {};
 714        struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
 715
 716        if (src_addr) {
 717                if (src_addr->sa_family != dst_addr->sa_family)
 718                        return -EINVAL;
 719
 720                memcpy(src_in, src_addr, rdma_addr_size(src_addr));
 721        } else {
 722                src_in->sa_family = dst_addr->sa_family;
 723        }
 724
 725        return addr_resolve(src_in, dst_addr, addr, false, 0);
 726}
 727EXPORT_SYMBOL(rdma_resolve_ip_route);
 728
 729void rdma_addr_cancel(struct rdma_dev_addr *addr)
 730{
 731        struct addr_req *req, *temp_req;
 732
 733        mutex_lock(&lock);
 734        list_for_each_entry_safe(req, temp_req, &req_list, list) {
 735                if (req->addr == addr) {
 736                        req->status = -ECANCELED;
 737                        req->timeout = jiffies;
 738                        list_move(&req->list, &req_list);
 739                        set_timeout(&req->work, req->timeout);
 740                        break;
 741                }
 742        }
 743        mutex_unlock(&lock);
 744}
 745EXPORT_SYMBOL(rdma_addr_cancel);
 746
 747struct resolve_cb_context {
 748        struct rdma_dev_addr *addr;
 749        struct completion comp;
 750        int status;
 751};
 752
 753static void resolve_cb(int status, struct sockaddr *src_addr,
 754             struct rdma_dev_addr *addr, void *context)
 755{
 756        if (!status)
 757                memcpy(((struct resolve_cb_context *)context)->addr,
 758                       addr, sizeof(struct rdma_dev_addr));
 759        ((struct resolve_cb_context *)context)->status = status;
 760        complete(&((struct resolve_cb_context *)context)->comp);
 761}
 762
 763int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 764                                 const union ib_gid *dgid,
 765                                 u8 *dmac, u16 *vlan_id, int *if_index,
 766                                 int *hoplimit)
 767{
 768        int ret = 0;
 769        struct rdma_dev_addr dev_addr;
 770        struct resolve_cb_context ctx;
 771        struct net_device *dev;
 772
 773        union {
 774                struct sockaddr     _sockaddr;
 775                struct sockaddr_in  _sockaddr_in;
 776                struct sockaddr_in6 _sockaddr_in6;
 777        } sgid_addr, dgid_addr;
 778
 779
 780        rdma_gid2ip(&sgid_addr._sockaddr, sgid);
 781        rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 782
 783        memset(&dev_addr, 0, sizeof(dev_addr));
 784        if (if_index)
 785                dev_addr.bound_dev_if = *if_index;
 786        dev_addr.net = &init_net;
 787
 788        ctx.addr = &dev_addr;
 789        init_completion(&ctx.comp);
 790        ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
 791                        &dev_addr, 1000, resolve_cb, &ctx);
 792        if (ret)
 793                return ret;
 794
 795        wait_for_completion(&ctx.comp);
 796
 797        ret = ctx.status;
 798        if (ret)
 799                return ret;
 800
 801        memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
 802        dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
 803        if (!dev)
 804                return -ENODEV;
 805        if (if_index)
 806                *if_index = dev_addr.bound_dev_if;
 807        if (vlan_id)
 808                *vlan_id = rdma_vlan_dev_vlan_id(dev);
 809        if (hoplimit)
 810                *hoplimit = dev_addr.hoplimit;
 811        dev_put(dev);
 812        return ret;
 813}
 814EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
 815
 816int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
 817{
 818        int ret = 0;
 819        struct rdma_dev_addr dev_addr;
 820        union {
 821                struct sockaddr     _sockaddr;
 822                struct sockaddr_in  _sockaddr_in;
 823                struct sockaddr_in6 _sockaddr_in6;
 824        } gid_addr;
 825
 826        rdma_gid2ip(&gid_addr._sockaddr, sgid);
 827
 828        memset(&dev_addr, 0, sizeof(dev_addr));
 829        dev_addr.net = &init_net;
 830        ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
 831        if (ret)
 832                return ret;
 833
 834        memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
 835        return ret;
 836}
 837EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
 838
 839static int netevent_callback(struct notifier_block *self, unsigned long event,
 840        void *ctx)
 841{
 842        if (event == NETEVENT_NEIGH_UPDATE) {
 843                struct neighbour *neigh = ctx;
 844
 845                if (neigh->nud_state & NUD_VALID)
 846                        set_timeout(&work, jiffies);
 847        }
 848        return 0;
 849}
 850
 851static struct notifier_block nb = {
 852        .notifier_call = netevent_callback
 853};
 854
 855int addr_init(void)
 856{
 857        addr_wq = alloc_ordered_workqueue("ib_addr", WQ_MEM_RECLAIM);
 858        if (!addr_wq)
 859                return -ENOMEM;
 860
 861        register_netevent_notifier(&nb);
 862        rdma_addr_register_client(&self);
 863
 864        return 0;
 865}
 866
 867void addr_cleanup(void)
 868{
 869        rdma_addr_unregister_client(&self);
 870        unregister_netevent_notifier(&nb);
 871        destroy_workqueue(addr_wq);
 872}
 873