linux/drivers/infiniband/core/addr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
   3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
   4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
   5 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/mutex.h>
  37#include <linux/inetdevice.h>
  38#include <linux/slab.h>
  39#include <linux/workqueue.h>
  40#include <linux/module.h>
  41#include <net/arp.h>
  42#include <net/neighbour.h>
  43#include <net/route.h>
  44#include <net/netevent.h>
  45#include <net/addrconf.h>
  46#include <net/ip6_route.h>
  47#include <rdma/ib_addr.h>
  48#include <rdma/ib.h>
  49
  50MODULE_AUTHOR("Sean Hefty");
  51MODULE_DESCRIPTION("IB Address Translation");
  52MODULE_LICENSE("Dual BSD/GPL");
  53
  54struct addr_req {
  55        struct list_head list;
  56        struct sockaddr_storage src_addr;
  57        struct sockaddr_storage dst_addr;
  58        struct rdma_dev_addr *addr;
  59        struct rdma_addr_client *client;
  60        void *context;
  61        void (*callback)(int status, struct sockaddr *src_addr,
  62                         struct rdma_dev_addr *addr, void *context);
  63        unsigned long timeout;
  64        int status;
  65};
  66
  67static void process_req(struct work_struct *work);
  68
  69static DEFINE_MUTEX(lock);
  70static LIST_HEAD(req_list);
  71static DECLARE_DELAYED_WORK(work, process_req);
  72static struct workqueue_struct *addr_wq;
  73
  74int rdma_addr_size(struct sockaddr *addr)
  75{
  76        switch (addr->sa_family) {
  77        case AF_INET:
  78                return sizeof(struct sockaddr_in);
  79        case AF_INET6:
  80                return sizeof(struct sockaddr_in6);
  81        case AF_IB:
  82                return sizeof(struct sockaddr_ib);
  83        default:
  84                return 0;
  85        }
  86}
  87EXPORT_SYMBOL(rdma_addr_size);
  88
  89static struct rdma_addr_client self;
  90
  91void rdma_addr_register_client(struct rdma_addr_client *client)
  92{
  93        atomic_set(&client->refcount, 1);
  94        init_completion(&client->comp);
  95}
  96EXPORT_SYMBOL(rdma_addr_register_client);
  97
  98static inline void put_client(struct rdma_addr_client *client)
  99{
 100        if (atomic_dec_and_test(&client->refcount))
 101                complete(&client->comp);
 102}
 103
 104void rdma_addr_unregister_client(struct rdma_addr_client *client)
 105{
 106        put_client(client);
 107        wait_for_completion(&client->comp);
 108}
 109EXPORT_SYMBOL(rdma_addr_unregister_client);
 110
 111int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 112                     const unsigned char *dst_dev_addr)
 113{
 114        dev_addr->dev_type = dev->type;
 115        memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
 116        memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
 117        if (dst_dev_addr)
 118                memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
 119        dev_addr->bound_dev_if = dev->ifindex;
 120        return 0;
 121}
 122EXPORT_SYMBOL(rdma_copy_addr);
 123
 124int rdma_translate_ip(const struct sockaddr *addr,
 125                      struct rdma_dev_addr *dev_addr,
 126                      u16 *vlan_id)
 127{
 128        struct net_device *dev;
 129        int ret = -EADDRNOTAVAIL;
 130
 131        if (dev_addr->bound_dev_if) {
 132                dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
 133                if (!dev)
 134                        return -ENODEV;
 135                ret = rdma_copy_addr(dev_addr, dev, NULL);
 136                dev_put(dev);
 137                return ret;
 138        }
 139
 140        switch (addr->sa_family) {
 141        case AF_INET:
 142                dev = ip_dev_find(dev_addr->net,
 143                        ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
 144
 145                if (!dev)
 146                        return ret;
 147
 148                ret = rdma_copy_addr(dev_addr, dev, NULL);
 149                if (vlan_id)
 150                        *vlan_id = rdma_vlan_dev_vlan_id(dev);
 151                dev_put(dev);
 152                break;
 153#if IS_ENABLED(CONFIG_IPV6)
 154        case AF_INET6:
 155                rcu_read_lock();
 156                for_each_netdev_rcu(dev_addr->net, dev) {
 157                        if (ipv6_chk_addr(dev_addr->net,
 158                                          &((const struct sockaddr_in6 *)addr)->sin6_addr,
 159                                          dev, 1)) {
 160                                ret = rdma_copy_addr(dev_addr, dev, NULL);
 161                                if (vlan_id)
 162                                        *vlan_id = rdma_vlan_dev_vlan_id(dev);
 163                                break;
 164                        }
 165                }
 166                rcu_read_unlock();
 167                break;
 168#endif
 169        }
 170        return ret;
 171}
 172EXPORT_SYMBOL(rdma_translate_ip);
 173
 174static void set_timeout(unsigned long time)
 175{
 176        unsigned long delay;
 177
 178        delay = time - jiffies;
 179        if ((long)delay < 0)
 180                delay = 0;
 181
 182        mod_delayed_work(addr_wq, &work, delay);
 183}
 184
 185static void queue_req(struct addr_req *req)
 186{
 187        struct addr_req *temp_req;
 188
 189        mutex_lock(&lock);
 190        list_for_each_entry_reverse(temp_req, &req_list, list) {
 191                if (time_after_eq(req->timeout, temp_req->timeout))
 192                        break;
 193        }
 194
 195        list_add(&req->list, &temp_req->list);
 196
 197        if (req_list.next == &req->list)
 198                set_timeout(req->timeout);
 199        mutex_unlock(&lock);
 200}
 201
 202static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
 203                        const void *daddr)
 204{
 205        struct neighbour *n;
 206        int ret;
 207
 208        n = dst_neigh_lookup(dst, daddr);
 209
 210        rcu_read_lock();
 211        if (!n || !(n->nud_state & NUD_VALID)) {
 212                if (n)
 213                        neigh_event_send(n, NULL);
 214                ret = -ENODATA;
 215        } else {
 216                ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
 217        }
 218        rcu_read_unlock();
 219
 220        if (n)
 221                neigh_release(n);
 222
 223        return ret;
 224}
 225
 226static int addr4_resolve(struct sockaddr_in *src_in,
 227                         const struct sockaddr_in *dst_in,
 228                         struct rdma_dev_addr *addr,
 229                         struct rtable **prt)
 230{
 231        __be32 src_ip = src_in->sin_addr.s_addr;
 232        __be32 dst_ip = dst_in->sin_addr.s_addr;
 233        struct rtable *rt;
 234        struct flowi4 fl4;
 235        int ret;
 236
 237        memset(&fl4, 0, sizeof(fl4));
 238        fl4.daddr = dst_ip;
 239        fl4.saddr = src_ip;
 240        fl4.flowi4_oif = addr->bound_dev_if;
 241        rt = ip_route_output_key(addr->net, &fl4);
 242        if (IS_ERR(rt)) {
 243                ret = PTR_ERR(rt);
 244                goto out;
 245        }
 246        src_in->sin_family = AF_INET;
 247        src_in->sin_addr.s_addr = fl4.saddr;
 248
 249        /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
 250         * routable) and we could set the network type accordingly.
 251         */
 252        if (rt->rt_uses_gateway)
 253                addr->network = RDMA_NETWORK_IPV4;
 254
 255        addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
 256
 257        *prt = rt;
 258        return 0;
 259out:
 260        return ret;
 261}
 262
 263#if IS_ENABLED(CONFIG_IPV6)
 264static int addr6_resolve(struct sockaddr_in6 *src_in,
 265                         const struct sockaddr_in6 *dst_in,
 266                         struct rdma_dev_addr *addr,
 267                         struct dst_entry **pdst)
 268{
 269        struct flowi6 fl6;
 270        struct dst_entry *dst;
 271        struct rt6_info *rt;
 272        int ret;
 273
 274        memset(&fl6, 0, sizeof fl6);
 275        fl6.daddr = dst_in->sin6_addr;
 276        fl6.saddr = src_in->sin6_addr;
 277        fl6.flowi6_oif = addr->bound_dev_if;
 278
 279        dst = ip6_route_output(addr->net, NULL, &fl6);
 280        if ((ret = dst->error))
 281                goto put;
 282
 283        rt = (struct rt6_info *)dst;
 284        if (ipv6_addr_any(&fl6.saddr)) {
 285                ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
 286                                         &fl6.daddr, 0, &fl6.saddr);
 287                if (ret)
 288                        goto put;
 289
 290                src_in->sin6_family = AF_INET6;
 291                src_in->sin6_addr = fl6.saddr;
 292        }
 293
 294        /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
 295         * routable) and we could set the network type accordingly.
 296         */
 297        if (rt->rt6i_flags & RTF_GATEWAY)
 298                addr->network = RDMA_NETWORK_IPV6;
 299
 300        addr->hoplimit = ip6_dst_hoplimit(dst);
 301
 302        *pdst = dst;
 303        return 0;
 304put:
 305        dst_release(dst);
 306        return ret;
 307}
 308#else
 309static int addr6_resolve(struct sockaddr_in6 *src_in,
 310                         const struct sockaddr_in6 *dst_in,
 311                         struct rdma_dev_addr *addr,
 312                         struct dst_entry **pdst)
 313{
 314        return -EADDRNOTAVAIL;
 315}
 316#endif
 317
 318static int addr_resolve_neigh(struct dst_entry *dst,
 319                              const struct sockaddr *dst_in,
 320                              struct rdma_dev_addr *addr)
 321{
 322        if (dst->dev->flags & IFF_LOOPBACK) {
 323                int ret;
 324
 325                ret = rdma_translate_ip(dst_in, addr, NULL);
 326                if (!ret)
 327                        memcpy(addr->dst_dev_addr, addr->src_dev_addr,
 328                               MAX_ADDR_LEN);
 329
 330                return ret;
 331        }
 332
 333        /* If the device doesn't do ARP internally */
 334        if (!(dst->dev->flags & IFF_NOARP)) {
 335                const struct sockaddr_in *dst_in4 =
 336                        (const struct sockaddr_in *)dst_in;
 337                const struct sockaddr_in6 *dst_in6 =
 338                        (const struct sockaddr_in6 *)dst_in;
 339
 340                return dst_fetch_ha(dst, addr,
 341                                    dst_in->sa_family == AF_INET ?
 342                                    (const void *)&dst_in4->sin_addr.s_addr :
 343                                    (const void *)&dst_in6->sin6_addr);
 344        }
 345
 346        return rdma_copy_addr(addr, dst->dev, NULL);
 347}
 348
 349static int addr_resolve(struct sockaddr *src_in,
 350                        const struct sockaddr *dst_in,
 351                        struct rdma_dev_addr *addr,
 352                        bool resolve_neigh)
 353{
 354        struct net_device *ndev;
 355        struct dst_entry *dst;
 356        int ret;
 357
 358        if (src_in->sa_family == AF_INET) {
 359                struct rtable *rt = NULL;
 360                const struct sockaddr_in *dst_in4 =
 361                        (const struct sockaddr_in *)dst_in;
 362
 363                ret = addr4_resolve((struct sockaddr_in *)src_in,
 364                                    dst_in4, addr, &rt);
 365                if (ret)
 366                        return ret;
 367
 368                if (resolve_neigh)
 369                        ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
 370
 371                ndev = rt->dst.dev;
 372                dev_hold(ndev);
 373
 374                ip_rt_put(rt);
 375        } else {
 376                const struct sockaddr_in6 *dst_in6 =
 377                        (const struct sockaddr_in6 *)dst_in;
 378
 379                ret = addr6_resolve((struct sockaddr_in6 *)src_in,
 380                                    dst_in6, addr,
 381                                    &dst);
 382                if (ret)
 383                        return ret;
 384
 385                if (resolve_neigh)
 386                        ret = addr_resolve_neigh(dst, dst_in, addr);
 387
 388                ndev = dst->dev;
 389                dev_hold(ndev);
 390
 391                dst_release(dst);
 392        }
 393
 394        addr->bound_dev_if = ndev->ifindex;
 395        addr->net = dev_net(ndev);
 396        dev_put(ndev);
 397
 398        return ret;
 399}
 400
 401static void process_req(struct work_struct *work)
 402{
 403        struct addr_req *req, *temp_req;
 404        struct sockaddr *src_in, *dst_in;
 405        struct list_head done_list;
 406
 407        INIT_LIST_HEAD(&done_list);
 408
 409        mutex_lock(&lock);
 410        list_for_each_entry_safe(req, temp_req, &req_list, list) {
 411                if (req->status == -ENODATA) {
 412                        src_in = (struct sockaddr *) &req->src_addr;
 413                        dst_in = (struct sockaddr *) &req->dst_addr;
 414                        req->status = addr_resolve(src_in, dst_in, req->addr,
 415                                                   true);
 416                        if (req->status && time_after_eq(jiffies, req->timeout))
 417                                req->status = -ETIMEDOUT;
 418                        else if (req->status == -ENODATA)
 419                                continue;
 420                }
 421                list_move_tail(&req->list, &done_list);
 422        }
 423
 424        if (!list_empty(&req_list)) {
 425                req = list_entry(req_list.next, struct addr_req, list);
 426                set_timeout(req->timeout);
 427        }
 428        mutex_unlock(&lock);
 429
 430        list_for_each_entry_safe(req, temp_req, &done_list, list) {
 431                list_del(&req->list);
 432                req->callback(req->status, (struct sockaddr *) &req->src_addr,
 433                        req->addr, req->context);
 434                put_client(req->client);
 435                kfree(req);
 436        }
 437}
 438
 439int rdma_resolve_ip(struct rdma_addr_client *client,
 440                    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 441                    struct rdma_dev_addr *addr, int timeout_ms,
 442                    void (*callback)(int status, struct sockaddr *src_addr,
 443                                     struct rdma_dev_addr *addr, void *context),
 444                    void *context)
 445{
 446        struct sockaddr *src_in, *dst_in;
 447        struct addr_req *req;
 448        int ret = 0;
 449
 450        req = kzalloc(sizeof *req, GFP_KERNEL);
 451        if (!req)
 452                return -ENOMEM;
 453
 454        src_in = (struct sockaddr *) &req->src_addr;
 455        dst_in = (struct sockaddr *) &req->dst_addr;
 456
 457        if (src_addr) {
 458                if (src_addr->sa_family != dst_addr->sa_family) {
 459                        ret = -EINVAL;
 460                        goto err;
 461                }
 462
 463                memcpy(src_in, src_addr, rdma_addr_size(src_addr));
 464        } else {
 465                src_in->sa_family = dst_addr->sa_family;
 466        }
 467
 468        memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
 469        req->addr = addr;
 470        req->callback = callback;
 471        req->context = context;
 472        req->client = client;
 473        atomic_inc(&client->refcount);
 474
 475        req->status = addr_resolve(src_in, dst_in, addr, true);
 476        switch (req->status) {
 477        case 0:
 478                req->timeout = jiffies;
 479                queue_req(req);
 480                break;
 481        case -ENODATA:
 482                req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
 483                queue_req(req);
 484                break;
 485        default:
 486                ret = req->status;
 487                atomic_dec(&client->refcount);
 488                goto err;
 489        }
 490        return ret;
 491err:
 492        kfree(req);
 493        return ret;
 494}
 495EXPORT_SYMBOL(rdma_resolve_ip);
 496
 497int rdma_resolve_ip_route(struct sockaddr *src_addr,
 498                          const struct sockaddr *dst_addr,
 499                          struct rdma_dev_addr *addr)
 500{
 501        struct sockaddr_storage ssrc_addr = {};
 502        struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
 503
 504        if (src_addr) {
 505                if (src_addr->sa_family != dst_addr->sa_family)
 506                        return -EINVAL;
 507
 508                memcpy(src_in, src_addr, rdma_addr_size(src_addr));
 509        } else {
 510                src_in->sa_family = dst_addr->sa_family;
 511        }
 512
 513        return addr_resolve(src_in, dst_addr, addr, false);
 514}
 515EXPORT_SYMBOL(rdma_resolve_ip_route);
 516
 517void rdma_addr_cancel(struct rdma_dev_addr *addr)
 518{
 519        struct addr_req *req, *temp_req;
 520
 521        mutex_lock(&lock);
 522        list_for_each_entry_safe(req, temp_req, &req_list, list) {
 523                if (req->addr == addr) {
 524                        req->status = -ECANCELED;
 525                        req->timeout = jiffies;
 526                        list_move(&req->list, &req_list);
 527                        set_timeout(req->timeout);
 528                        break;
 529                }
 530        }
 531        mutex_unlock(&lock);
 532}
 533EXPORT_SYMBOL(rdma_addr_cancel);
 534
 535struct resolve_cb_context {
 536        struct rdma_dev_addr *addr;
 537        struct completion comp;
 538};
 539
 540static void resolve_cb(int status, struct sockaddr *src_addr,
 541             struct rdma_dev_addr *addr, void *context)
 542{
 543        memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
 544                                rdma_dev_addr));
 545        complete(&((struct resolve_cb_context *)context)->comp);
 546}
 547
 548int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 549                                 const union ib_gid *dgid,
 550                                 u8 *dmac, u16 *vlan_id, int *if_index,
 551                                 int *hoplimit)
 552{
 553        int ret = 0;
 554        struct rdma_dev_addr dev_addr;
 555        struct resolve_cb_context ctx;
 556        struct net_device *dev;
 557
 558        union {
 559                struct sockaddr     _sockaddr;
 560                struct sockaddr_in  _sockaddr_in;
 561                struct sockaddr_in6 _sockaddr_in6;
 562        } sgid_addr, dgid_addr;
 563
 564
 565        rdma_gid2ip(&sgid_addr._sockaddr, sgid);
 566        rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 567
 568        memset(&dev_addr, 0, sizeof(dev_addr));
 569        if (if_index)
 570                dev_addr.bound_dev_if = *if_index;
 571        dev_addr.net = &init_net;
 572
 573        ctx.addr = &dev_addr;
 574        init_completion(&ctx.comp);
 575        ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
 576                        &dev_addr, 1000, resolve_cb, &ctx);
 577        if (ret)
 578                return ret;
 579
 580        wait_for_completion(&ctx.comp);
 581
 582        memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
 583        dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
 584        if (!dev)
 585                return -ENODEV;
 586        if (if_index)
 587                *if_index = dev_addr.bound_dev_if;
 588        if (vlan_id)
 589                *vlan_id = rdma_vlan_dev_vlan_id(dev);
 590        if (hoplimit)
 591                *hoplimit = dev_addr.hoplimit;
 592        dev_put(dev);
 593        return ret;
 594}
 595EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
 596
 597int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
 598{
 599        int ret = 0;
 600        struct rdma_dev_addr dev_addr;
 601        union {
 602                struct sockaddr     _sockaddr;
 603                struct sockaddr_in  _sockaddr_in;
 604                struct sockaddr_in6 _sockaddr_in6;
 605        } gid_addr;
 606
 607        rdma_gid2ip(&gid_addr._sockaddr, sgid);
 608
 609        memset(&dev_addr, 0, sizeof(dev_addr));
 610        dev_addr.net = &init_net;
 611        ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
 612        if (ret)
 613                return ret;
 614
 615        memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
 616        return ret;
 617}
 618EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
 619
 620static int netevent_callback(struct notifier_block *self, unsigned long event,
 621        void *ctx)
 622{
 623        if (event == NETEVENT_NEIGH_UPDATE) {
 624                struct neighbour *neigh = ctx;
 625
 626                if (neigh->nud_state & NUD_VALID) {
 627                        set_timeout(jiffies);
 628                }
 629        }
 630        return 0;
 631}
 632
 633static struct notifier_block nb = {
 634        .notifier_call = netevent_callback
 635};
 636
 637static int __init addr_init(void)
 638{
 639        addr_wq = create_singlethread_workqueue("ib_addr");
 640        if (!addr_wq)
 641                return -ENOMEM;
 642
 643        register_netevent_notifier(&nb);
 644        rdma_addr_register_client(&self);
 645        return 0;
 646}
 647
 648static void __exit addr_cleanup(void)
 649{
 650        rdma_addr_unregister_client(&self);
 651        unregister_netevent_notifier(&nb);
 652        destroy_workqueue(addr_wq);
 653}
 654
 655module_init(addr_init);
 656module_exit(addr_cleanup);
 657