linux/drivers/infiniband/core/addr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
   3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
   4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
   5 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/mutex.h>
  37#include <linux/inetdevice.h>
  38#include <linux/slab.h>
  39#include <linux/workqueue.h>
  40#include <linux/module.h>
  41#include <net/arp.h>
  42#include <net/neighbour.h>
  43#include <net/route.h>
  44#include <net/netevent.h>
  45#include <net/addrconf.h>
  46#include <net/ip6_route.h>
  47#include <rdma/ib_addr.h>
  48#include <rdma/ib.h>
  49
  50MODULE_AUTHOR("Sean Hefty");
  51MODULE_DESCRIPTION("IB Address Translation");
  52MODULE_LICENSE("Dual BSD/GPL");
  53
  54struct addr_req {
  55        struct list_head list;
  56        struct sockaddr_storage src_addr;
  57        struct sockaddr_storage dst_addr;
  58        struct rdma_dev_addr *addr;
  59        struct rdma_addr_client *client;
  60        void *context;
  61        void (*callback)(int status, struct sockaddr *src_addr,
  62                         struct rdma_dev_addr *addr, void *context);
  63        unsigned long timeout;
  64        int status;
  65};
  66
  67static void process_req(struct work_struct *work);
  68
  69static DEFINE_MUTEX(lock);
  70static LIST_HEAD(req_list);
  71static DECLARE_DELAYED_WORK(work, process_req);
  72static struct workqueue_struct *addr_wq;
  73
  74int rdma_addr_size(struct sockaddr *addr)
  75{
  76        switch (addr->sa_family) {
  77        case AF_INET:
  78                return sizeof(struct sockaddr_in);
  79        case AF_INET6:
  80                return sizeof(struct sockaddr_in6);
  81        case AF_IB:
  82                return sizeof(struct sockaddr_ib);
  83        default:
  84                return 0;
  85        }
  86}
  87EXPORT_SYMBOL(rdma_addr_size);
  88
  89static struct rdma_addr_client self;
  90
  91void rdma_addr_register_client(struct rdma_addr_client *client)
  92{
  93        atomic_set(&client->refcount, 1);
  94        init_completion(&client->comp);
  95}
  96EXPORT_SYMBOL(rdma_addr_register_client);
  97
  98static inline void put_client(struct rdma_addr_client *client)
  99{
 100        if (atomic_dec_and_test(&client->refcount))
 101                complete(&client->comp);
 102}
 103
 104void rdma_addr_unregister_client(struct rdma_addr_client *client)
 105{
 106        put_client(client);
 107        wait_for_completion(&client->comp);
 108}
 109EXPORT_SYMBOL(rdma_addr_unregister_client);
 110
 111int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 112                     const unsigned char *dst_dev_addr)
 113{
 114        dev_addr->dev_type = dev->type;
 115        memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
 116        memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
 117        if (dst_dev_addr)
 118                memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
 119        dev_addr->bound_dev_if = dev->ifindex;
 120        return 0;
 121}
 122EXPORT_SYMBOL(rdma_copy_addr);
 123
 124int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
 125                      u16 *vlan_id)
 126{
 127        struct net_device *dev;
 128        int ret = -EADDRNOTAVAIL;
 129
 130        if (dev_addr->bound_dev_if) {
 131                dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
 132                if (!dev)
 133                        return -ENODEV;
 134                ret = rdma_copy_addr(dev_addr, dev, NULL);
 135                dev_put(dev);
 136                return ret;
 137        }
 138
 139        switch (addr->sa_family) {
 140        case AF_INET:
 141                dev = ip_dev_find(&init_net,
 142                        ((struct sockaddr_in *) addr)->sin_addr.s_addr);
 143
 144                if (!dev)
 145                        return ret;
 146
 147                ret = rdma_copy_addr(dev_addr, dev, NULL);
 148                if (vlan_id)
 149                        *vlan_id = rdma_vlan_dev_vlan_id(dev);
 150                dev_put(dev);
 151                break;
 152
 153#if IS_ENABLED(CONFIG_IPV6)
 154        case AF_INET6:
 155                rcu_read_lock();
 156                for_each_netdev_rcu(&init_net, dev) {
 157                        if (ipv6_chk_addr(&init_net,
 158                                          &((struct sockaddr_in6 *) addr)->sin6_addr,
 159                                          dev, 1)) {
 160                                ret = rdma_copy_addr(dev_addr, dev, NULL);
 161                                if (vlan_id)
 162                                        *vlan_id = rdma_vlan_dev_vlan_id(dev);
 163                                break;
 164                        }
 165                }
 166                rcu_read_unlock();
 167                break;
 168#endif
 169        }
 170        return ret;
 171}
 172EXPORT_SYMBOL(rdma_translate_ip);
 173
 174static void set_timeout(unsigned long time)
 175{
 176        unsigned long delay;
 177
 178        delay = time - jiffies;
 179        if ((long)delay <= 0)
 180                delay = 1;
 181
 182        mod_delayed_work(addr_wq, &work, delay);
 183}
 184
 185static void queue_req(struct addr_req *req)
 186{
 187        struct addr_req *temp_req;
 188
 189        mutex_lock(&lock);
 190        list_for_each_entry_reverse(temp_req, &req_list, list) {
 191                if (time_after_eq(req->timeout, temp_req->timeout))
 192                        break;
 193        }
 194
 195        list_add(&req->list, &temp_req->list);
 196
 197        if (req_list.next == &req->list)
 198                set_timeout(req->timeout);
 199        mutex_unlock(&lock);
 200}
 201
 202static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
 203{
 204        struct neighbour *n;
 205        int ret;
 206
 207        n = dst_neigh_lookup(dst, daddr);
 208
 209        rcu_read_lock();
 210        if (!n || !(n->nud_state & NUD_VALID)) {
 211                if (n)
 212                        neigh_event_send(n, NULL);
 213                ret = -ENODATA;
 214        } else {
 215                ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
 216        }
 217        rcu_read_unlock();
 218
 219        if (n)
 220                neigh_release(n);
 221
 222        return ret;
 223}
 224
 225static int addr4_resolve(struct sockaddr_in *src_in,
 226                         struct sockaddr_in *dst_in,
 227                         struct rdma_dev_addr *addr)
 228{
 229        __be32 src_ip = src_in->sin_addr.s_addr;
 230        __be32 dst_ip = dst_in->sin_addr.s_addr;
 231        struct rtable *rt;
 232        struct flowi4 fl4;
 233        int ret;
 234
 235        memset(&fl4, 0, sizeof(fl4));
 236        fl4.daddr = dst_ip;
 237        fl4.saddr = src_ip;
 238        fl4.flowi4_oif = addr->bound_dev_if;
 239        rt = ip_route_output_key(&init_net, &fl4);
 240        if (IS_ERR(rt)) {
 241                ret = PTR_ERR(rt);
 242                goto out;
 243        }
 244        src_in->sin_family = AF_INET;
 245        src_in->sin_addr.s_addr = fl4.saddr;
 246
 247        if (rt->dst.dev->flags & IFF_LOOPBACK) {
 248                ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
 249                if (!ret)
 250                        memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
 251                goto put;
 252        }
 253
 254        /* If the device does ARP internally, return 'done' */
 255        if (rt->dst.dev->flags & IFF_NOARP) {
 256                ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
 257                goto put;
 258        }
 259
 260        ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
 261put:
 262        ip_rt_put(rt);
 263out:
 264        return ret;
 265}
 266
 267#if IS_ENABLED(CONFIG_IPV6)
 268static int addr6_resolve(struct sockaddr_in6 *src_in,
 269                         struct sockaddr_in6 *dst_in,
 270                         struct rdma_dev_addr *addr)
 271{
 272        struct flowi6 fl6;
 273        struct dst_entry *dst;
 274        int ret;
 275
 276        memset(&fl6, 0, sizeof fl6);
 277        fl6.daddr = dst_in->sin6_addr;
 278        fl6.saddr = src_in->sin6_addr;
 279        fl6.flowi6_oif = addr->bound_dev_if;
 280
 281        dst = ip6_route_output(&init_net, NULL, &fl6);
 282        if ((ret = dst->error))
 283                goto put;
 284
 285        if (ipv6_addr_any(&fl6.saddr)) {
 286                ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
 287                                         &fl6.daddr, 0, &fl6.saddr);
 288                if (ret)
 289                        goto put;
 290
 291                src_in->sin6_family = AF_INET6;
 292                src_in->sin6_addr = fl6.saddr;
 293        }
 294
 295        if (dst->dev->flags & IFF_LOOPBACK) {
 296                ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
 297                if (!ret)
 298                        memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
 299                goto put;
 300        }
 301
 302        /* If the device does ARP internally, return 'done' */
 303        if (dst->dev->flags & IFF_NOARP) {
 304                ret = rdma_copy_addr(addr, dst->dev, NULL);
 305                goto put;
 306        }
 307
 308        ret = dst_fetch_ha(dst, addr, &fl6.daddr);
 309put:
 310        dst_release(dst);
 311        return ret;
 312}
 313#else
 314static int addr6_resolve(struct sockaddr_in6 *src_in,
 315                         struct sockaddr_in6 *dst_in,
 316                         struct rdma_dev_addr *addr)
 317{
 318        return -EADDRNOTAVAIL;
 319}
 320#endif
 321
 322static int addr_resolve(struct sockaddr *src_in,
 323                        struct sockaddr *dst_in,
 324                        struct rdma_dev_addr *addr)
 325{
 326        if (src_in->sa_family == AF_INET) {
 327                return addr4_resolve((struct sockaddr_in *) src_in,
 328                        (struct sockaddr_in *) dst_in, addr);
 329        } else
 330                return addr6_resolve((struct sockaddr_in6 *) src_in,
 331                        (struct sockaddr_in6 *) dst_in, addr);
 332}
 333
 334static void process_req(struct work_struct *work)
 335{
 336        struct addr_req *req, *temp_req;
 337        struct sockaddr *src_in, *dst_in;
 338        struct list_head done_list;
 339
 340        INIT_LIST_HEAD(&done_list);
 341
 342        mutex_lock(&lock);
 343        list_for_each_entry_safe(req, temp_req, &req_list, list) {
 344                if (req->status == -ENODATA) {
 345                        src_in = (struct sockaddr *) &req->src_addr;
 346                        dst_in = (struct sockaddr *) &req->dst_addr;
 347                        req->status = addr_resolve(src_in, dst_in, req->addr);
 348                        if (req->status && time_after_eq(jiffies, req->timeout))
 349                                req->status = -ETIMEDOUT;
 350                        else if (req->status == -ENODATA)
 351                                continue;
 352                }
 353                list_move_tail(&req->list, &done_list);
 354        }
 355
 356        if (!list_empty(&req_list)) {
 357                req = list_entry(req_list.next, struct addr_req, list);
 358                set_timeout(req->timeout);
 359        }
 360        mutex_unlock(&lock);
 361
 362        list_for_each_entry_safe(req, temp_req, &done_list, list) {
 363                list_del(&req->list);
 364                req->callback(req->status, (struct sockaddr *) &req->src_addr,
 365                        req->addr, req->context);
 366                put_client(req->client);
 367                kfree(req);
 368        }
 369}
 370
 371int rdma_resolve_ip(struct rdma_addr_client *client,
 372                    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 373                    struct rdma_dev_addr *addr, int timeout_ms,
 374                    void (*callback)(int status, struct sockaddr *src_addr,
 375                                     struct rdma_dev_addr *addr, void *context),
 376                    void *context)
 377{
 378        struct sockaddr *src_in, *dst_in;
 379        struct addr_req *req;
 380        int ret = 0;
 381
 382        req = kzalloc(sizeof *req, GFP_KERNEL);
 383        if (!req)
 384                return -ENOMEM;
 385
 386        src_in = (struct sockaddr *) &req->src_addr;
 387        dst_in = (struct sockaddr *) &req->dst_addr;
 388
 389        if (src_addr) {
 390                if (src_addr->sa_family != dst_addr->sa_family) {
 391                        ret = -EINVAL;
 392                        goto err;
 393                }
 394
 395                memcpy(src_in, src_addr, rdma_addr_size(src_addr));
 396        } else {
 397                src_in->sa_family = dst_addr->sa_family;
 398        }
 399
 400        memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
 401        req->addr = addr;
 402        req->callback = callback;
 403        req->context = context;
 404        req->client = client;
 405        atomic_inc(&client->refcount);
 406
 407        req->status = addr_resolve(src_in, dst_in, addr);
 408        switch (req->status) {
 409        case 0:
 410                req->timeout = jiffies;
 411                queue_req(req);
 412                break;
 413        case -ENODATA:
 414                req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
 415                queue_req(req);
 416                break;
 417        default:
 418                ret = req->status;
 419                atomic_dec(&client->refcount);
 420                goto err;
 421        }
 422        return ret;
 423err:
 424        kfree(req);
 425        return ret;
 426}
 427EXPORT_SYMBOL(rdma_resolve_ip);
 428
 429void rdma_addr_cancel(struct rdma_dev_addr *addr)
 430{
 431        struct addr_req *req, *temp_req;
 432
 433        mutex_lock(&lock);
 434        list_for_each_entry_safe(req, temp_req, &req_list, list) {
 435                if (req->addr == addr) {
 436                        req->status = -ECANCELED;
 437                        req->timeout = jiffies;
 438                        list_move(&req->list, &req_list);
 439                        set_timeout(req->timeout);
 440                        break;
 441                }
 442        }
 443        mutex_unlock(&lock);
 444}
 445EXPORT_SYMBOL(rdma_addr_cancel);
 446
 447struct resolve_cb_context {
 448        struct rdma_dev_addr *addr;
 449        struct completion comp;
 450};
 451
 452static void resolve_cb(int status, struct sockaddr *src_addr,
 453             struct rdma_dev_addr *addr, void *context)
 454{
 455        memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
 456                                rdma_dev_addr));
 457        complete(&((struct resolve_cb_context *)context)->comp);
 458}
 459
 460int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
 461                               u16 *vlan_id)
 462{
 463        int ret = 0;
 464        struct rdma_dev_addr dev_addr;
 465        struct resolve_cb_context ctx;
 466        struct net_device *dev;
 467
 468        union {
 469                struct sockaddr     _sockaddr;
 470                struct sockaddr_in  _sockaddr_in;
 471                struct sockaddr_in6 _sockaddr_in6;
 472        } sgid_addr, dgid_addr;
 473
 474
 475        ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
 476        if (ret)
 477                return ret;
 478
 479        ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 480        if (ret)
 481                return ret;
 482
 483        memset(&dev_addr, 0, sizeof(dev_addr));
 484
 485        ctx.addr = &dev_addr;
 486        init_completion(&ctx.comp);
 487        ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
 488                        &dev_addr, 1000, resolve_cb, &ctx);
 489        if (ret)
 490                return ret;
 491
 492        wait_for_completion(&ctx.comp);
 493
 494        memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
 495        dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
 496        if (!dev)
 497                return -ENODEV;
 498        if (vlan_id)
 499                *vlan_id = rdma_vlan_dev_vlan_id(dev);
 500        dev_put(dev);
 501        return ret;
 502}
 503EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
 504
 505int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
 506{
 507        int ret = 0;
 508        struct rdma_dev_addr dev_addr;
 509        union {
 510                struct sockaddr     _sockaddr;
 511                struct sockaddr_in  _sockaddr_in;
 512                struct sockaddr_in6 _sockaddr_in6;
 513        } gid_addr;
 514
 515        ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
 516
 517        if (ret)
 518                return ret;
 519        memset(&dev_addr, 0, sizeof(dev_addr));
 520        ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
 521        if (ret)
 522                return ret;
 523
 524        memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
 525        return ret;
 526}
 527EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
 528
 529static int netevent_callback(struct notifier_block *self, unsigned long event,
 530        void *ctx)
 531{
 532        if (event == NETEVENT_NEIGH_UPDATE) {
 533                struct neighbour *neigh = ctx;
 534
 535                if (neigh->nud_state & NUD_VALID) {
 536                        set_timeout(jiffies);
 537                }
 538        }
 539        return 0;
 540}
 541
 542static struct notifier_block nb = {
 543        .notifier_call = netevent_callback
 544};
 545
 546static int __init addr_init(void)
 547{
 548        addr_wq = create_singlethread_workqueue("ib_addr");
 549        if (!addr_wq)
 550                return -ENOMEM;
 551
 552        register_netevent_notifier(&nb);
 553        rdma_addr_register_client(&self);
 554        return 0;
 555}
 556
 557static void __exit addr_cleanup(void)
 558{
 559        rdma_addr_unregister_client(&self);
 560        unregister_netevent_notifier(&nb);
 561        destroy_workqueue(addr_wq);
 562}
 563
 564module_init(addr_init);
 565module_exit(addr_cleanup);
 566